コード例 #1
0
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert (wtype in ['router', 'container', 'websocket-testee'])

        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(
                id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running',
                                   emsg)

        # check worker options
        #
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            elif wtype == 'websocket-testee':
                checkconfig.check_websocket_testee_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "Could not start native worker: invalid configuration ({})".format(
                e)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.invalid_configuration',
                                   emsg)

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)
        else:
            exe = sys.executable

        # all native workers (routers and containers for now) start from the same script
        #
        filename = os.path.abspath(
            os.path.join(crossbar.__file__, "..", "worker", "process.py"))

        # assemble command line for forking the worker
        #
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])
        args.extend(["--loglevel", _loglevel])

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        # FIXME
        # elif self._node.options.reactor:
        #    args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # We need to use the same PYTHONPATH we were started with, so we can
        # find the Crossbar we're working with -- it may not be the same as the
        # one on the default path
        worker_env["PYTHONPATH"] = os.pathsep.join(sys.path)

        # log name of worker
        #
        worker_logname = {
            'router': 'Router',
            'container': 'Container',
            'websocket-testee': 'WebSocketTestee'
        }.get(wtype, 'Worker')

        # topic URIs used (later)
        #
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(
                self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(
                self._node_id)
        elif wtype == 'websocket-testee':
            starting_topic = 'crossbar.node.{}.on_websocket_testee_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_websocket_testee_started'.format(
                self._node_id)
        else:
            raise Exception("logic error")

        # add worker tracking instance to the worker map ..
        #
        if wtype == 'router':
            worker = RouterWorkerProcess(self,
                                         id,
                                         details.caller,
                                         keeplog=options.get(
                                             'traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self,
                                            id,
                                            details.caller,
                                            keeplog=options.get(
                                                'traceback', None))
        elif wtype == 'websocket-testee':
            worker = WebSocketTesteeWorkerProcess(self,
                                                  id,
                                                  details.caller,
                                                  keeplog=options.get(
                                                      'traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        # create a (custom) process endpoint.
        #
        if platform.isWindows():
            childFDs = None  # Use the default Twisted ones
        else:
            # The communication between controller and container workers is
            # using WAMP running over 2 pipes.
            # For controller->container traffic this runs over FD 0 (`stdin`)
            # and for the container->controller traffic, this runs over FD 3.
            #
            # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for
            # container->controller traffic, so that components running in the
            # container which happen to write to `stdout` or `stderr` do not
            # interfere with the container-controller communication.
            childFDs = {0: "w", 1: "r", 2: "r", 3: "r"}

        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   env=worker_env,
                                   worker=worker,
                                   childFDs=childFDs)

        # ready handling
        #
        def on_ready_success(id):
            self.log.info("{worker} with ID '{id}' and PID {pid} started",
                          worker=worker_logname,
                          id=worker.id,
                          pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before',
                'shutdown',
                self._cleanup_worker,
                self._node._reactor,
                worker,
            )

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            # FIXME: make start of stats printer dependent on log level ..
            worker.log_stats(5.)

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]
            emsg = 'Failed to start native worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg,
                                   worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(_):
            self.log.info("Node worker {} ended successfully".format(
                worker.id))
            worker.log_stats(0)
            del self._workers[worker.id]
            return True

        def on_exit_error(err):
            self.log.info("Node worker {} ended with error ({})".format(
                worker.id, err))
            worker.log_stats(0)
            del self._workers[worker.id]
            return False

        def check_for_shutdown(was_successful):
            shutdown = False

            # automatically shutdown node whenever a worker ended (successfully, or with error)
            #
            if checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended, and trigger '{}' active".format(
                        checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT))
                shutdown = True

            # automatically shutdown node when worker ended with error
            #
            if not was_successful and checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended with error, and trigger '{}' active".
                    format(
                        checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR))
                shutdown = True

            # automatically shutdown node when no more workers are left
            #
            if len(
                    self._workers
            ) == 0 and checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "No more node workers running, and trigger '{}' active".
                    format(checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT))
                shutdown = True

            # initiate shutdown (but only if we are not already shutting down)
            #
            if shutdown:
                if not self._shutdown_requested:
                    self.log.info("Node shutting down ..")
                    self.shutdown()
                else:
                    # ignore: shutdown already initiated ..
                    self.log.info("Node is already shutting down.")
            else:
                self.log.info(
                    "Node will continue to run (node shutdown triggers active: {})"
                    .format(self._node._node_shutdown_triggers))

        d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error)
        d_on_exit.addBoth(check_for_shutdown)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(
            self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...",
                      worker=worker_logname,
                      id=id)
        self.log.debug("{worker} '{id}' command line is '{cmdline}'",
                       worker=worker_logname,
                       id=id,
                       cmdline=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            self.log.debug("Native worker process connected with PID {pid}",
                           pid=pid)

            # note the PID of the worker
            worker.pid = pid

            # proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            self.log.error(
                "Interal error: connection to forked native worker failed ({err})",
                err=err)

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #2
0
    def start_guest(self, id, config, details=None):
        """
        Start a new guest process on this node.

        :param config: The guest process configuration.
        :type config: obj

        :returns: int -- The PID of the new process.
        """
        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(
                id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running',
                                   emsg)

        try:
            checkconfig.check_guest(config)
        except Exception as e:
            raise ApplicationError(
                u'crossbar.error.invalid_configuration',
                'invalid guest worker configuration: {}'.format(e))

        options = config.get('options', {})

        # guest process working directory
        #
        workdir = self._node._cbdir
        if 'workdir' in options:
            workdir = os.path.join(workdir, options['workdir'])
        workdir = os.path.abspath(workdir)

        # guest process executable and command line arguments
        #

        # first try to configure the fully qualified path for the guest
        # executable by joining workdir and configured exectuable ..
        exe = os.path.abspath(os.path.join(workdir, config['executable']))

        if check_executable(exe):
            self.log.info(
                "Using guest worker executable '{exe}' (executable path taken from configuration)",
                exe=exe)
        else:
            # try to detect the fully qualified path for the guest
            # executable by doing a "which" on the configured executable name
            exe = shutil.which(config['executable'])
            if exe is not None and check_executable(exe):
                self.log.info(
                    "Using guest worker executable '{exe}' (executable path detected from environment)",
                    exe=exe)
            else:
                emsg = "Could not start worker: could not find and executable for '{}'".format(
                    config['executable'])
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)

        # guest process command line arguments
        #
        args = [exe]
        args.extend(config.get('arguments', []))

        # guest process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = 'Guest'

        # topic URIs used (later)
        #
        starting_topic = 'crossbar.node.{}.on_guest_starting'.format(
            self._node_id)
        started_topic = 'crossbar.node.{}.on_guest_started'.format(
            self._node_id)

        # add worker tracking instance to the worker map ..
        #
        worker = GuestWorkerProcess(self,
                                    id,
                                    details.caller,
                                    keeplog=options.get('traceback', None))

        self._workers[id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   path=workdir,
                                   env=worker_env,
                                   worker=worker)

        # ready handling
        #
        def on_ready_success(proto):

            worker.pid = proto.transport.pid
            worker.status = 'started'
            worker.started = datetime.utcnow()

            self.log.info("{worker} with ID '{id}' and PID {pid} started",
                          worker=worker_logname,
                          id=worker.id,
                          pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before',
                'shutdown',
                self._cleanup_worker,
                self._node._reactor,
                worker,
            )

            # directory watcher
            #
            if 'watch' in options:

                if HAS_FSNOTIFY:

                    # assemble list of watched directories
                    watched_dirs = []
                    for d in options['watch'].get('directories', []):
                        watched_dirs.append(
                            os.path.abspath(os.path.join(self._node._cbdir,
                                                         d)))

                    worker.watch_timeout = options['watch'].get('timeout', 1)

                    # create a directory watcher
                    worker.watcher = DirWatcher(dirs=watched_dirs,
                                                notify_once=True)

                    # make sure to stop the background thread running inside the
                    # watcher upon Twisted being shut down
                    def on_shutdown():
                        worker.watcher.stop()

                    self._node._reactor.addSystemEventTrigger(
                        'before', 'shutdown', on_shutdown)

                    # this handler will get fired by the watcher upon detecting an FS event
                    def on_fsevent(evt):
                        worker.watcher.stop()
                        proto.signal('TERM')

                        if options['watch'].get('action', None) == 'restart':
                            self.log.info("Restarting guest ..")
                            # Add a timeout large enough (perhaps add a config option later)
                            self._node._reactor.callLater(
                                worker.watch_timeout, self.start_guest, id,
                                config, details)
                            # Shut the worker down, after the restart event is scheduled
                            worker.stop()

                    # now run the watcher on a background thread
                    deferToThread(worker.watcher.loop, on_fsevent)

                else:
                    self.log.warn(
                        "Warning: cannot watch directory for changes - feature DirWatcher unavailable"
                    )

            # assemble guest worker startup information
            #
            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'Failed to start guest worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg,
                                   ep.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            self.log.info("Guest {id} exited with success", id=worker.id)
            del self._workers[worker.id]

        def on_exit_error(err):
            self.log.error("Guest {id} exited with error {err.value}",
                           id=worker.id,
                           err=err)
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_guest_worker_client_factory(
            config, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...",
                      worker=worker_logname,
                      id=id)
        self.log.debug("{worker} '{id}' using command line '{cli}'...",
                       worker=worker_logname,
                       id=id,
                       cli=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will
            # errback - probably only if the forking of a new process fails
            # at OS level due to out of memory conditions or such.

            pid = proto.transport.pid
            self.log.debug("Guest worker process connected with PID {pid}",
                           pid=pid)

            worker.pid = pid

            # proto is an instance of GuestWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all .. see above.
            self.log.error(
                "Internal error: connection to forked guest worker failed ({})"
                .format(err))

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #3
0
ファイル: process.py プロジェクト: biddyweb/crossbar
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert(wtype in ['router', 'container'])

        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "ERROR: could not start worker - a worker with ID '{}'' is already running (or starting)".format(id)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.worker_already_running', emsg)

        # check worker options
        #
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "ERROR: could not start native worker - invalid configuration ({})".format(e)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.invalid_configuration', emsg)

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "ERROR: python '{}' from worker options must be an absolute path".format(exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration', emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "ERROR: python '{}' from worker options does not exist or isn't an executable".format(exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration', emsg)
        else:
            exe = sys.executable

        # all native workers (routers and containers for now) start from the same script
        #
        filename = pkg_resources.resource_filename('crossbar', 'worker/process.py')

        # assemble command line for forking the worker
        #
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # allow overriding debug flag from options
        #
        if options.get('debug', self.debug):
            args.append('--debug')

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        elif self._node.options.reactor:
            args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = {'router': 'Router', 'container': 'Container'}.get(wtype, 'Worker')

        # topic URIs used (later)
        #
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(self._node_id)
        else:
            raise Exception("logic error")

        # add worker tracking instance to the worker map ..
        #
        if wtype == 'router':
            worker = RouterWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor, exe, args, env=worker_env, worker=worker)

        # ready handling
        #
        def on_ready_success(id):
            log.msg("{} with ID '{}' and PID {} started".format(worker_logname, worker.id, worker.pid))

            def cleanup_worker():
                try:
                    worker.proto.transport.signalProcess('TERM')
                except ProcessExitedAlready:
                    pass  # ignore; it's already dead

            self._node._reactor.addSystemEventTrigger(
                'before', 'shutdown',
                cleanup_worker,
            )

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            self.publish(started_topic, started_info, options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'ERROR: failed to start native worker - {}'.format(err.value)
            log.msg(emsg)
            raise ApplicationError("crossbar.error.cannot_start", emsg, worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            del self._workers[worker.id]

        def on_exit_error(err):
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        if self.debug:
            log.msg("Starting {} with ID '{}' using command line '{}' ..".format(worker_logname, id, ' '.join(args)))
        else:
            log.msg("Starting {} with ID '{}' ..".format(worker_logname, id))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            if self.debug:
                log.msg("Native worker process connected with PID {}".format(pid))

            # note the PID of the worker
            worker.pid = pid

            # proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            if self.debug:
                log.msg("ERROR: Connecting forked native worker failed - {}".format(err))

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #4
0
ファイル: process.py プロジェクト: biddyweb/crossbar
    def start_guest(self, id, config, details=None):
        """
        Start a new guest process on this node.

        :param config: The guest process configuration.
        :type config: obj

        :returns: int -- The PID of the new process.
        """
        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "ERROR: could not start worker - a worker with ID '{}' is already running (or starting)".format(id)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.worker_already_running', emsg)

        try:
            checkconfig.check_guest(config)
        except Exception as e:
            raise ApplicationError('crossbar.error.invalid_configuration', 'invalid guest worker configuration: {}'.format(e))

        options = config.get('options', {})

        # guest process working directory
        #
        workdir = self._node._cbdir
        if 'workdir' in options:
            workdir = os.path.join(workdir, options['workdir'])
        workdir = os.path.abspath(workdir)

        # guest process executable and command line arguments
        #

        # first try to configure the fully qualified path for the guest
        # executable by joining workdir and configured exectuable ..
        exe = os.path.abspath(os.path.join(workdir, config['executable']))

        if check_executable(exe):
            log.msg("Using guest worker executable '{}' (executable path taken from configuration)".format(exe))
        else:
            # try to detect the fully qualified path for the guest
            # executable by doing a "which" on the configured executable name
            exe = shutil.which(config['executable'])
            if exe is not None and check_executable(exe):
                log.msg("Using guest worker executable '{}' (executable path detected from environment)".format(exe))
            else:
                emsg = "ERROR: could not start worker - could not find and executable for '{}'".format(config['executable'])
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration', emsg)

        # guest process command line arguments
        #
        args = [exe]
        args.extend(config.get('arguments', []))

        # guest process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = 'Guest'

        # topic URIs used (later)
        #
        starting_topic = 'crossbar.node.{}.on_guest_starting'.format(self._node_id)
        started_topic = 'crossbar.node.{}.on_guest_started'.format(self._node_id)

        # add worker tracking instance to the worker map ..
        #
        worker = GuestWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))

        self._workers[id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor, exe, args, path=workdir, env=worker_env, worker=worker)

        # ready handling
        #
        def on_ready_success(proto):

            worker.pid = proto.transport.pid
            worker.status = 'started'
            worker.started = datetime.utcnow()

            log.msg("{} with ID '{}' and PID {} started".format(worker_logname, worker.id, worker.pid))

            # directory watcher
            #
            if 'watch' in options:

                if HAS_FSNOTIFY:

                    # assemble list of watched directories
                    watched_dirs = []
                    for d in options['watch'].get('directories', []):
                        watched_dirs.append(os.path.abspath(os.path.join(self._node._cbdir, d)))

                    # create a directory watcher
                    worker.watcher = DirWatcher(dirs=watched_dirs, notify_once=True)

                    # make sure to stop the background thread running inside the
                    # watcher upon Twisted being shut down
                    def on_shutdown():
                        worker.watcher.stop()

                    reactor.addSystemEventTrigger('before', 'shutdown', on_shutdown)

                    # this handler will get fired by the watcher upon detecting an FS event
                    def on_fsevent(evt):
                        worker.watcher.stop()
                        proto.signal('TERM')

                        if options['watch'].get('action', None) == 'restart':
                            log.msg("Restarting guest ..")
                            reactor.callLater(0.1, self.start_guest, id, config, details)

                    # now run the watcher on a background thread
                    deferToThread(worker.watcher.loop, on_fsevent)

                else:
                    log.msg("Warning: cannot watch directory for changes - feature DirWatcher unavailable")

            # assemble guest worker startup information
            #
            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            self.publish(started_topic, started_info, options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'ERROR: failed to start guest worker - {}'.format(err.value)
            log.msg(emsg)
            raise ApplicationError("crossbar.error.cannot_start", emsg, ep.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            log.msg("Guest excited with success")
            del self._workers[worker.id]

        def on_exit_error(err):
            log.msg("Guest excited with error", err)
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_guest_worker_client_factory(config, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        if self.debug:
            log.msg("Starting {} with ID '{}' using command line '{}' ..".format(worker_logname, id, ' '.join(args)))
        else:
            log.msg("Starting {} with ID '{}' ..".format(worker_logname, id))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will
            # errback - probably only if the forking of a new process fails
            # at OS level due to out of memory conditions or such.

            pid = proto.transport.pid
            if self.debug:
                log.msg("Guest worker process connected with PID {}".format(pid))

            worker.pid = pid

            # proto is an instance of GuestWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all .. see above.
            if self.debug:
                log.msg("ERROR: Connecting forked guest worker failed - {}".format(err))

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #5
0
    def _start_guest_worker(self, worker_id, worker_config, details=None):
        """
        Start a new guest process on this node.

        :param config: The guest process configuration.
        :type config: obj

        :returns: int -- The PID of the new process.
        """
        # prohibit starting a worker twice
        #
        if worker_id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(
                worker_id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running',
                                   emsg)

        try:
            checkconfig.check_guest(worker_config)
        except Exception as e:
            raise ApplicationError(
                u'crossbar.error.invalid_configuration',
                'invalid guest worker configuration: {}'.format(e))

        options = worker_config.get('options', {})

        # guest process working directory
        #
        workdir = self._node._cbdir
        if 'workdir' in options:
            workdir = os.path.join(workdir, options['workdir'])
        workdir = os.path.abspath(workdir)

        # guest process executable and command line arguments
        #

        # first try to configure the fully qualified path for the guest
        # executable by joining workdir and configured exectuable ..
        exe = os.path.abspath(
            os.path.join(workdir, worker_config['executable']))

        if check_executable(exe):
            self.log.info(
                "Using guest worker executable '{exe}' (executable path taken from configuration)",
                exe=exe)
        else:
            # try to detect the fully qualified path for the guest
            # executable by doing a "which" on the configured executable name
            exe = which(worker_config['executable'])
            if exe is not None and check_executable(exe):
                self.log.info(
                    "Using guest worker executable '{exe}' (executable path detected from environment)",
                    exe=exe)
            else:
                emsg = "Could not start worker: could not find and executable for '{}'".format(
                    worker_config['executable'])
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)

        # guest process command line arguments
        #
        args = [exe]
        args.extend(worker_config.get('arguments', []))

        # guest process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = 'Guest'

        # topic URIs used (later)
        #
        starting_topic = u'{}.on_guest_starting'.format(self._uri_prefix)
        started_topic = u'{}.on_guest_started'.format(self._uri_prefix)

        # add worker tracking instance to the worker map ..
        #
        worker = GuestWorkerProcess(self,
                                    worker_id,
                                    details.caller,
                                    keeplog=options.get('traceback', None))

        self._workers[worker_id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   path=workdir,
                                   env=worker_env,
                                   worker=worker)

        # ready handling
        #
        def on_ready_success(proto):

            self.log.info('{worker_logname} worker "{worker_id}" started',
                          worker_logname=worker_logname,
                          worker_id=worker.id)

            worker.on_worker_started(proto)

            self._node._reactor.addSystemEventTrigger(
                'before',
                'shutdown',
                self._cleanup_worker,
                self._node._reactor,
                worker,
            )

            # directory watcher
            #
            if 'watch' in options:

                if HAS_FS_WATCHER:

                    # assemble list of watched directories
                    watched_dirs = []
                    for d in options['watch'].get('directories', []):
                        watched_dirs.append(
                            os.path.abspath(os.path.join(self._node._cbdir,
                                                         d)))

                    worker.watch_timeout = options['watch'].get('timeout', 1)

                    # create a filesystem watcher
                    worker.watcher = FilesystemWatcher(
                        workdir, watched_dirs=watched_dirs)

                    # make sure to stop the watch upon Twisted being shut down
                    def on_shutdown():
                        worker.watcher.stop()

                    self._node._reactor.addSystemEventTrigger(
                        'before', 'shutdown', on_shutdown)

                    # this handler will get fired by the watcher upon detecting an FS event
                    def on_filesystem_change(fs_event):
                        worker.watcher.stop()
                        proto.signal('TERM')

                        if options['watch'].get('action', None) == 'restart':
                            self.log.info(
                                "Filesystem watcher detected change {fs_event} - restarting guest in {watch_timeout} seconds ..",
                                fs_event=fs_event,
                                watch_timeout=worker.watch_timeout)
                            # Add a timeout large enough (perhaps add a config option later)
                            self._node._reactor.callLater(
                                worker.watch_timeout, self.start_guest,
                                worker_id, worker_config, details)
                            # Shut the worker down, after the restart event is scheduled
                            # FIXME: all workers should have a stop() method ..
                            # -> 'GuestWorkerProcess' object has no attribute 'stop'
                            # worker.stop()
                        else:
                            self.log.info(
                                "Filesystem watcher detected change {fs_event} - no action taken!",
                                fs_event=fs_event)

                    # now start watching ..
                    worker.watcher.start(on_filesystem_change)

                else:
                    self.log.warn(
                        "Cannot watch directories for changes - feature not available"
                    )

            # assemble guest worker startup information
            #
            started_info = {
                u'id': worker.id,
                u'status': worker.status,
                u'started': utcstr(worker.started),
                u'who': worker.who,
            }

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=details.caller))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'Failed to start guest worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg,
                                   ep.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            self.log.info("Guest {worker_id} exited with success",
                          worker_id=worker.id)
            del self._workers[worker.id]

        def on_exit_error(err):
            self.log.error("Guest {worker_id} exited with error {err.value}",
                           worker_id=worker.id,
                           err=err)
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_guest_worker_client_factory(
            worker_config, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[worker_id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            u'id': worker_id,
            u'status': worker.status,
            u'created': utcstr(worker.created),
            u'who': worker.who,
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=details.caller))

        # now actually fork the worker ..
        #
        self.log.info('{worker_logname} "{worker_id}" process starting ..',
                      worker_logname=worker_logname,
                      worker_id=worker_id)
        self.log.debug(
            '{worker_logname} "{worker_id}" process using command line "{cli}" ..',
            worker_logname=worker_logname,
            worker_id=worker_id,
            cli=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will
            # errback - probably only if the forking of a new process fails
            # at OS level due to out of memory conditions or such.
            self.log.debug('{worker_logname} "{worker_id}" connected',
                           worker_logname=worker_logname,
                           worker_id=worker_id)

            # do not comment this: it will lead to on_worker_started being called
            # _before_ on_worker_connected, and we don't need it!
            # worker.on_worker_connected(proto)

        def on_connect_error(err):

            # not sure when this errback is triggered at all .. see above.
            self.log.failure(
                "Internal error: connection to forked guest worker failed ({log_failure.value})",
            )

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #6
0
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert (wtype in ['router', 'container'])

        ## prohibit starting a worker twice
        ##
        if id in self._workers:
            emsg = "ERROR: could not start worker - a worker with ID '{}'' is already running (or starting)".format(
                id)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.worker_already_running',
                                   emsg)

        ## check worker options
        ##
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "ERROR: could not start native worker - invalid configuration ({})".format(
                e)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.invalid_configuration',
                                   emsg)

        ## allow override Python executable from options
        ##
        if 'python' in options:
            exe = options['python']

            ## the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            ##
            if not os.path.isabs(exe):
                emsg = "ERROR: python '{}' from worker options must be an absolute path".format(
                    exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration',
                                       emsg)

            ## of course the path must exist and actually be executable
            ##
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "ERROR: python '{}' from worker options does not exist or isn't an executable".format(
                    exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration',
                                       emsg)
        else:
            exe = sys.executable

        ## all native workers (routers and containers for now) start from the same script
        ##
        filename = pkg_resources.resource_filename('crossbar',
                                                   'worker/process.py')

        ## assemble command line for forking the worker
        ##
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])

        ## allow override worker process title from options
        ##
        if options.get('title', None):
            args.extend(['--title', options['title']])

        ## allow overriding debug flag from options
        ##
        if options.get('debug', self.debug):
            args.append('--debug')

        ## forward explicit reactor selection
        ##
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        elif self._node.options.reactor:
            args.extend(['--reactor', self._node.options.reactor])

        ## create worker process environment
        ##
        worker_env = create_process_env(options)

        ## log name of worker
        ##
        worker_logname = {
            'router': 'Router',
            'container': 'Container'
        }.get(wtype, 'Worker')

        ## topic URIs used (later)
        ##
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(
                self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(
                self._node_id)
        else:
            raise Exception("logic error")

        ## add worker tracking instance to the worker map ..
        ##
        if wtype == 'router':
            worker = RouterWorkerProcess(self,
                                         id,
                                         details.authid,
                                         keeplog=options.get(
                                             'traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self,
                                            id,
                                            details.authid,
                                            keeplog=options.get(
                                                'traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        ## create a (custom) process endpoint
        ##
        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   env=worker_env,
                                   worker=worker)

        ## ready handling
        ##
        def on_ready_success(id):
            log.msg("{} with ID '{}' and PID {} started".format(
                worker_logname, worker.id, worker.pid))

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'ERROR: failed to start native worker - {}'.format(
                err.value)
            log.msg(emsg)
            raise ApplicationError("crossbar.error.cannot_start", emsg,
                                   worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            del self._workers[worker.id]

        def on_exit_error(err):
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        ## create a transport factory for talking WAMP to the native worker
        ##
        transport_factory = create_native_worker_client_factory(
            self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        ## now (immediately before actually forking) signal the starting of the worker
        ##
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        ## the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        ## .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=[details.caller]))

        ## now actually fork the worker ..
        ##
        if self.debug:
            log.msg(
                "Starting {} with ID '{}' using command line '{}' ..".format(
                    worker_logname, id, ' '.join(args)))
        else:
            log.msg("Starting {} with ID '{}' ..".format(worker_logname, id))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            ## this seems to be called immediately when the child process
            ## has been forked. even if it then immediately fails because
            ## e.g. the executable doesn't even exist. in other words,
            ## I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            if self.debug:
                log.msg(
                    "Native worker process connected with PID {}".format(pid))

            ## note the PID of the worker
            worker.pid = pid

            ## proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            ## not sure when this errback is triggered at all ..
            if self.debug:
                log.msg("ERROR: Connecting forked native worker failed - {}".
                        format(err))

            ## in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #7
0
    def _start_native_worker(self,
                             worker_type,
                             worker_id,
                             worker_options=None,
                             details=None):

        # prohibit starting a worker twice
        #
        if worker_id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(
                worker_id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running',
                                   emsg)

        # check worker options
        #
        options = worker_options or {}
        try:
            if worker_type in self._node._native_workers:
                if self._node._native_workers[worker_type][
                        'checkconfig_options']:
                    self._node._native_workers[worker_type][
                        'checkconfig_options'](options)
                else:
                    raise Exception(
                        'No checkconfig_options for worker type "{worker_type}" implemented!'
                        .format(worker_type=worker_type))
            else:
                raise Exception('invalid worker type "{}"'.format(worker_type))
        except Exception as e:
            emsg = "Could not start native worker: invalid configuration ({})".format(
                e)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.invalid_configuration',
                                   emsg)

        # the fully qualified worker class as a string
        worker_class = qual(
            self._node._native_workers[worker_type]['worker_class'])

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)
        else:
            exe = sys.executable

        # allow override default Python module search paths from options
        #
        if 'pythonpath' in options:
            pythonpaths_to_add = [
                os.path.abspath(os.path.join(self._node._cbdir, p))
                for p in options.get('pythonpath', [])
            ]
        else:
            pythonpaths_to_add = []

        # assemble command line for forking the worker
        #
        # all native workers (routers and containers for now) start
        # from the same script in crossbar/worker/process.py -- we're
        # invoking via "-m" so that .pyc files, __pycache__ etc work
        # properly.
        #
        args = [exe, "-u", "-m", "crossbar.worker.process"]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--worker", str(worker_id)])
        args.extend(["--realm", self._realm])
        args.extend(["--klass", worker_class])
        args.extend(["--loglevel", get_global_log_level()])
        if "shutdown" in options:
            args.extend(["--shutdown", options["shutdown"]])

        # Node-level callback to inject worker arguments
        #
        self._node._extend_worker_args(args, options)

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        # FIXME
        # elif self._node.options.reactor:
        #    args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # We need to use the same PYTHONPATH we were started with, so we can
        # find the Crossbar we're working with -- it may not be the same as the
        # one on the default path
        worker_env["PYTHONPATH"] = os.pathsep.join(pythonpaths_to_add +
                                                   sys.path)

        # log name of worker
        #
        worker_logname = self._node._native_workers[worker_type]['logname']

        # each worker is run under its own dedicated WAMP auth role
        #
        worker_auth_role = u'crossbar.worker.{}'.format(worker_id)

        # topic URIs used (later)
        #
        starting_topic = self._node._native_workers[worker_type]['topics'][
            'starting']
        started_topic = self._node._native_workers[worker_type]['topics'][
            'started']

        # add worker tracking instance to the worker map ..
        #
        WORKER = self._node._native_workers[worker_type]['class']
        worker = WORKER(self,
                        worker_id,
                        details.caller,
                        keeplog=options.get('traceback', None))
        self._workers[worker_id] = worker

        # create a (custom) process endpoint.
        #
        if platform.isWindows():
            childFDs = None  # Use the default Twisted ones
        else:
            # The communication between controller and container workers is
            # using WAMP running over 2 pipes.
            # For controller->container traffic this runs over FD 0 (`stdin`)
            # and for the container->controller traffic, this runs over FD 3.
            #
            # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for
            # container->controller traffic, so that components running in the
            # container which happen to write to `stdout` or `stderr` do not
            # interfere with the container-controller communication.
            childFDs = {0: "w", 1: "r", 2: "r", 3: "r"}

        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   env=worker_env,
                                   worker=worker,
                                   childFDs=childFDs)

        # ready handling
        #
        def on_ready_success(worker_id):
            self.log.info(
                '{worker_type} worker "{worker_id}" process {pid} started',
                worker_type=worker_logname,
                worker_id=worker.id,
                pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before',
                'shutdown',
                self._cleanup_worker,
                self._node._reactor,
                worker,
            )

            worker.on_worker_started()

            started_info = {
                u'id': worker.id,
                u'status': worker.status,
                u'started': utcstr(worker.started),
                u'who': worker.who,
            }

            # FIXME: make start of stats printer dependent on log level ..
            if False:
                worker.log_stats(5.)

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=details.caller))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]
            emsg = 'Failed to start native worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg,
                                   worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(_):
            self.log.info("Node worker {worker.id} ended successfully",
                          worker=worker)

            # clear worker log
            worker.log_stats(0)

            # remove the dedicated node router authrole we dynamically
            # added for the worker
            self._node._drop_worker_role(worker_auth_role)

            # remove our metadata tracking for the worker
            del self._workers[worker.id]

            # indicate that the worker excited successfully
            return True

        def on_exit_error(err):
            self.log.info("Node worker {worker.id} ended with error ({err})",
                          worker=worker,
                          err=err)

            # clear worker log
            worker.log_stats(0)

            # remove the dedicated node router authrole we dynamically
            # added for the worker
            self._node._drop_worker_role(worker_auth_role)

            # remove our metadata tracking for the worker
            del self._workers[worker.id]

            # indicate that the worker excited with error
            return False

        def check_for_shutdown(was_successful):
            self.log.info(
                'Checking for node shutdown: worker_exit_success={worker_exit_success}, shutdown_requested={shutdown_requested}, node_shutdown_triggers={node_shutdown_triggers}',
                worker_exit_success=was_successful,
                shutdown_requested=self._shutdown_requested,
                node_shutdown_triggers=self._node._node_shutdown_triggers)

            shutdown = self._shutdown_requested

            # automatically shutdown node whenever a worker ended (successfully, or with error)
            #
            if checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended, and trigger '{trigger}' active",
                    trigger=checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT)
                shutdown = True

            # automatically shutdown node when worker ended with error
            #
            if not was_successful and checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended with error, and trigger '{trigger}' active",
                    trigger=checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR
                )
                shutdown = True

            # automatically shutdown node when no more workers are left
            #
            if len(
                    self._workers
            ) == 0 and checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "No more node workers running, and trigger '{trigger}' active",
                    trigger=checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT)
                shutdown = True

            # initiate shutdown (but only if we are not already shutting down)
            #
            if shutdown:
                self.shutdown()
            else:
                self.log.info('Node will continue to run!')

        d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error)
        d_on_exit.addBoth(check_for_shutdown)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(
            self._node._router_session_factory, worker_auth_role, worker.ready,
            worker.exit)
        transport_factory.noisy = False
        self._workers[worker_id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            u'id': worker_id,
            u'status': worker.status,
            u'created': utcstr(worker.created),
            u'who': worker.who,
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=details.caller))

        # now actually fork the worker ..
        #
        self.log.info('{worker_logname} worker "{worker_id}" starting ..',
                      worker_logname=worker_logname,
                      worker_id=worker_id)
        self.log.debug(
            '{worker_logname} "{worker_id}" command line is "{cmdline}"',
            worker_logname=worker_logname,
            worker_id=worker_id,
            cmdline=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            self.log.debug('Native worker "{worker_id}" connected',
                           worker_id=worker_id)

            worker.on_worker_connected(proto)

            # dynamically add a dedicated authrole to the router
            # for the worker we've just started
            self._node._add_worker_role(worker_auth_role, options)

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            self.log.error(
                "Interal error: connection to forked native worker failed ({err})",
                err=err)

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #8
0
ファイル: process.py プロジェクト: v09-software/crossbar
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert(wtype in ['router', 'container', 'websocket-testee'])

        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running', emsg)

        # check worker options
        #
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            elif wtype == 'websocket-testee':
                checkconfig.check_websocket_testee_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "Could not start native worker: invalid configuration ({})".format(e)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format(exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format(exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)
        else:
            exe = sys.executable

        # all native workers (routers and containers for now) start from the same script
        #
        filename = FilePath(crossbar.__file__).parent().child("worker").child("process.py").path

        # assemble command line for forking the worker
        #
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])
        args.extend(["--loglevel", _loglevel])

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        elif self._node.options.reactor:
            args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # We need to use the same PYTHONPATH we were started with, so we can
        # find the Crossbar we're working with -- it may not be the same as the
        # one on the default path
        worker_env["PYTHONPATH"] = os.pathsep.join(sys.path)

        # log name of worker
        #
        worker_logname = {
            'router': 'Router',
            'container': 'Container',
            'websocket-testee': 'WebSocketTestee'
        }.get(wtype, 'Worker')

        # topic URIs used (later)
        #
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(self._node_id)
        elif wtype == 'websocket-testee':
            starting_topic = 'crossbar.node.{}.on_websocket_testee_starting'.format(self._node_id)
            started_topic = 'crossbar.node.{}.on_websocket_testee_started'.format(self._node_id)
        else:
            raise Exception("logic error")

        # add worker tracking instance to the worker map ..
        #
        if wtype == 'router':
            worker = RouterWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))
        elif wtype == 'websocket-testee':
            worker = WebSocketTesteeWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        # create a (custom) process endpoint.
        #
        if platform.isWindows():
            childFDs = None  # Use the default Twisted ones
        else:
            # The communication between controller and container workers is
            # using WAMP running over 2 pipes.
            # For controller->container traffic this runs over FD 0 (`stdin`)
            # and for the container->controller traffic, this runs over FD 3.
            #
            # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for
            # container->controller traffic, so that components running in the
            # container which happen to write to `stdout` or `stderr` do not
            # interfere with the container-controller communication.
            childFDs = {0: "w", 1: "r", 2: "r", 3: "r"}

        ep = WorkerProcessEndpoint(
            self._node._reactor, exe, args, env=worker_env, worker=worker,
            childFDs=childFDs)

        # ready handling
        #
        def on_ready_success(id):
            self.log.info("{worker} with ID '{id}' and PID {pid} started",
                          worker=worker_logname, id=worker.id, pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before', 'shutdown',
                self._cleanup_worker, self._node._reactor, worker,
            )

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            # FIXME: make start of stats printer dependent on log level ..
            worker.log_stats(5.)

            self.publish(started_topic, started_info, options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]
            emsg = 'Failed to start native worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg, worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            worker.log_stats(0)
            del self._workers[worker.id]
            return worker.id

        def on_exit_error(err):
            worker.log_stats(0)
            del self._workers[worker.id]
            return worker.id

        def check_for_shutdown(worker_id):
            shutdown = True
            if not self._workers:
                shutdown = True

            self.log.info("Node worker {} ended ({} workers left)".format(worker_id, len(self._workers)))

            if shutdown:
                if not self._shutdown_requested:
                    self.log.info("Node shutting down ..")
                    self._shutdown_requested = True
                    self.shutdown()
                else:
                    # shutdown already initiated
                    pass

        d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error)
        d_on_exit.addBoth(check_for_shutdown)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...",
                      worker=worker_logname, id=id)
        self.log.debug("{worker} '{id}' command line is '{cmdline}'",
                       worker=worker_logname, id=id, cmdline=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            self.log.debug("Native worker process connected with PID {pid}",
                           pid=pid)

            # note the PID of the worker
            worker.pid = pid

            # proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            self.log.error("Interal error: connection to forked native worker failed ({err})", err=err)

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #9
0
ファイル: process.py プロジェクト: FirefighterBlu3/crossbar
    def start_guest(self, id, config, details=None):
        """
        Start a new guest process on this node.

        :param config: The guest process configuration.
        :type config: obj

        :returns: int -- The PID of the new process.
        """
        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running', emsg)

        try:
            checkconfig.check_guest(config)
        except Exception as e:
            raise ApplicationError(u'crossbar.error.invalid_configuration', 'invalid guest worker configuration: {}'.format(e))

        options = config.get('options', {})

        # guest process working directory
        #
        workdir = self._node._cbdir
        if 'workdir' in options:
            workdir = os.path.join(workdir, options['workdir'])
        workdir = os.path.abspath(workdir)

        # guest process executable and command line arguments
        #

        # first try to configure the fully qualified path for the guest
        # executable by joining workdir and configured exectuable ..
        exe = os.path.abspath(os.path.join(workdir, config['executable']))

        if check_executable(exe):
            self.log.info("Using guest worker executable '{exe}' (executable path taken from configuration)",
                          exe=exe)
        else:
            # try to detect the fully qualified path for the guest
            # executable by doing a "which" on the configured executable name
            exe = shutil.which(config['executable'])
            if exe is not None and check_executable(exe):
                self.log.info("Using guest worker executable '{exe}' (executable path detected from environment)",
                              exe=exe)
            else:
                emsg = "Could not start worker: could not find and executable for '{}'".format(config['executable'])
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)

        # guest process command line arguments
        #
        args = [exe]
        args.extend(config.get('arguments', []))

        # guest process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = 'Guest'

        # topic URIs used (later)
        #
        starting_topic = 'crossbar.node.{}.on_guest_starting'.format(self._node_id)
        started_topic = 'crossbar.node.{}.on_guest_started'.format(self._node_id)

        # add worker tracking instance to the worker map ..
        #
        worker = GuestWorkerProcess(self, id, details.caller, keeplog=options.get('traceback', None))

        self._workers[id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor, exe, args, path=workdir, env=worker_env, worker=worker)

        # ready handling
        #
        def on_ready_success(proto):

            worker.pid = proto.transport.pid
            worker.status = 'started'
            worker.started = datetime.utcnow()

            self.log.info("{worker} with ID '{id}' and PID {pid} started",
                          worker=worker_logname, id=worker.id, pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before', 'shutdown',
                self._cleanup_worker, self._node._reactor, worker,
            )

            # directory watcher
            #
            if 'watch' in options:

                if HAS_FS_WATCHER:

                    # assemble list of watched directories
                    watched_dirs = []
                    for d in options['watch'].get('directories', []):
                        watched_dirs.append(os.path.abspath(os.path.join(self._node._cbdir, d)))

                    worker.watch_timeout = options['watch'].get('timeout', 1)

                    # create a filesystem watcher
                    worker.watcher = FilesystemWatcher(workdir, watched_dirs=watched_dirs)

                    # make sure to stop the watch upon Twisted being shut down
                    def on_shutdown():
                        worker.watcher.stop()

                    self._node._reactor.addSystemEventTrigger('before', 'shutdown', on_shutdown)

                    # this handler will get fired by the watcher upon detecting an FS event
                    def on_filesystem_change(fs_event):
                        worker.watcher.stop()
                        proto.signal('TERM')

                        if options['watch'].get('action', None) == 'restart':
                            self.log.info("Filesystem watcher detected change {fs_event} - restarting guest in {watch_timeout} seconds ..", fs_event=fs_event, watch_timeout=worker.watch_timeout)
                            # Add a timeout large enough (perhaps add a config option later)
                            self._node._reactor.callLater(worker.watch_timeout, self.start_guest, id, config, details)
                            # Shut the worker down, after the restart event is scheduled
                            # FIXME: all workers should have a stop() method ..
                            # -> 'GuestWorkerProcess' object has no attribute 'stop'
                            # worker.stop()
                        else:
                            self.log.info("Filesystem watcher detected change {fs_event} - no action taken!", fs_event=fs_event)

                    # now start watching ..
                    worker.watcher.start(on_filesystem_change)

                else:
                    self.log.warn("Cannot watch directories for changes - feature not available")

            # assemble guest worker startup information
            #
            started_info = {
                u'id': worker.id,
                u'status': worker.status,
                u'started': utcstr(worker.started),
                u'who': worker.who,
            }

            self.publish(started_topic, started_info, options=PublishOptions(exclude=details.caller))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'Failed to start guest worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg, ep.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            self.log.info("Guest {id} exited with success", id=worker.id)
            del self._workers[worker.id]

        def on_exit_error(err):
            self.log.error("Guest {id} exited with error {err.value}",
                           id=worker.id, err=err)
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_guest_worker_client_factory(config, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            u'id': id,
            u'status': worker.status,
            u'created': utcstr(worker.created),
            u'who': worker.who,
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=details.caller))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...",
                      worker=worker_logname, id=id)
        self.log.debug("{worker} '{id}' using command line '{cli}'...",
                       worker=worker_logname, id=id, cli=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will
            # errback - probably only if the forking of a new process fails
            # at OS level due to out of memory conditions or such.

            pid = proto.transport.pid
            self.log.debug("Guest worker process connected with PID {pid}",
                           pid=pid)

            worker.pid = pid

            # proto is an instance of GuestWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all .. see above.
            self.log.failure(
                "Internal error: connection to forked guest worker failed ({log_failure.value})",
            )

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #10
0
ファイル: process.py プロジェクト: NinjaMSP/crossbar
    def _start_native_worker(self, worker_type, worker_id, worker_options=None, details=None):

        # prohibit starting a worker twice
        #
        if worker_id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(worker_id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running', emsg)

        # check worker options
        #
        options = worker_options or {}
        try:
            if worker_type in self._node._native_workers:
                if self._node._native_workers[worker_type]['checkconfig_options']:
                    self._node._native_workers[worker_type]['checkconfig_options'](options)
                else:
                    raise Exception('No checkconfig_options for worker type "{worker_type}" implemented!'.format(worker_type=worker_type))
            else:
                raise Exception('invalid worker type "{}"'.format(worker_type))
        except Exception as e:
            emsg = "Could not start native worker: invalid configuration ({})".format(e)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)

        # the fully qualified worker class as a string
        worker_class = qual(self._node._native_workers[worker_type]['worker_class'])

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format(exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format(exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration', emsg)
        else:
            exe = sys.executable

        # allow override default Python module search paths from options
        #
        if 'pythonpath' in options:
            pythonpaths_to_add = [os.path.abspath(os.path.join(self._node._cbdir, p)) for p in options.get('pythonpath', [])]
        else:
            pythonpaths_to_add = []

        # assemble command line for forking the worker
        #
        # all native workers (routers and containers for now) start
        # from the same script in crossbar/worker/process.py -- we're
        # invoking via "-m" so that .pyc files, __pycache__ etc work
        # properly.
        #
        args = [exe, "-u", "-m", "crossbar.worker.process"]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node._node_id)])
        args.extend(["--worker", str(worker_id)])
        args.extend(["--realm", self._realm])
        args.extend(["--klass", worker_class])
        args.extend(["--loglevel", get_global_log_level()])
        if "shutdown" in options:
            args.extend(["--shutdown", options["shutdown"]])

        # Node-level callback to inject worker arguments
        #
        self._node._extend_worker_args(args, options)

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        # FIXME
        # elif self._node.options.reactor:
        #    args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # We need to use the same PYTHONPATH we were started with, so we can
        # find the Crossbar we're working with -- it may not be the same as the
        # one on the default path
        worker_env["PYTHONPATH"] = os.pathsep.join(pythonpaths_to_add + sys.path)

        # log name of worker
        #
        worker_logname = self._node._native_workers[worker_type]['logname']

        # each worker is run under its own dedicated WAMP auth role
        #
        worker_auth_role = u'crossbar.worker.{}'.format(worker_id)

        # topic URIs used (later)
        #
        starting_topic = self._node._native_workers[worker_type]['topics']['starting']
        started_topic = self._node._native_workers[worker_type]['topics']['started']

        # add worker tracking instance to the worker map ..
        #
        WORKER = self._node._native_workers[worker_type]['class']
        worker = WORKER(self, worker_id, details.caller, keeplog=options.get('traceback', None))
        self._workers[worker_id] = worker

        # create a (custom) process endpoint.
        #
        if platform.isWindows():
            childFDs = None  # Use the default Twisted ones
        else:
            # The communication between controller and container workers is
            # using WAMP running over 2 pipes.
            # For controller->container traffic this runs over FD 0 (`stdin`)
            # and for the container->controller traffic, this runs over FD 3.
            #
            # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for
            # container->controller traffic, so that components running in the
            # container which happen to write to `stdout` or `stderr` do not
            # interfere with the container-controller communication.
            childFDs = {0: "w", 1: "r", 2: "r", 3: "r"}

        ep = WorkerProcessEndpoint(
            self._node._reactor, exe, args, env=worker_env, worker=worker,
            childFDs=childFDs)

        # ready handling
        #
        def on_ready_success(worker_id):
            self.log.info('{worker_type} worker "{worker_id}" process {pid} started',
                          worker_type=worker_logname, worker_id=worker.id, pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before', 'shutdown',
                self._cleanup_worker, self._node._reactor, worker,
            )

            worker.on_worker_started()

            started_info = {
                u'id': worker.id,
                u'status': worker.status,
                u'started': utcstr(worker.started),
                u'who': worker.who,
            }

            # FIXME: make start of stats printer dependent on log level ..
            if False:
                worker.log_stats(5.)

            self.publish(started_topic, started_info, options=PublishOptions(exclude=details.caller))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]
            emsg = 'Failed to start native worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg, worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(_):
            self.log.info("Node worker {worker.id} ended successfully", worker=worker)

            # clear worker log
            worker.log_stats(0)

            # remove the dedicated node router authrole we dynamically
            # added for the worker
            self._node._drop_worker_role(worker_auth_role)

            # remove our metadata tracking for the worker
            del self._workers[worker.id]

            # indicate that the worker excited successfully
            return True

        def on_exit_error(err):
            self.log.info("Node worker {worker.id} ended with error ({err})", worker=worker, err=err)

            # clear worker log
            worker.log_stats(0)

            # remove the dedicated node router authrole we dynamically
            # added for the worker
            self._node._drop_worker_role(worker_auth_role)

            # remove our metadata tracking for the worker
            del self._workers[worker.id]

            # indicate that the worker excited with error
            return False

        def check_for_shutdown(was_successful):
            self.log.info('Checking for node shutdown: worker_exit_success={worker_exit_success}, shutdown_requested={shutdown_requested}, node_shutdown_triggers={node_shutdown_triggers}', worker_exit_success=was_successful, shutdown_requested=self._shutdown_requested, node_shutdown_triggers=self._node._node_shutdown_triggers)

            shutdown = self._shutdown_requested

            # automatically shutdown node whenever a worker ended (successfully, or with error)
            #
            if checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info("Node worker ended, and trigger '{trigger}' active", trigger=checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT)
                shutdown = True

            # automatically shutdown node when worker ended with error
            #
            if not was_successful and checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR in self._node._node_shutdown_triggers:
                self.log.info("Node worker ended with error, and trigger '{trigger}' active", trigger=checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR)
                shutdown = True

            # automatically shutdown node when no more workers are left
            #
            if len(self._workers) == 0 and checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info("No more node workers running, and trigger '{trigger}' active", trigger=checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT)
                shutdown = True

            # initiate shutdown (but only if we are not already shutting down)
            #
            if shutdown:
                self.shutdown()
            else:
                self.log.info('Node will continue to run!')

        d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error)
        d_on_exit.addBoth(check_for_shutdown)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(self._node._router_session_factory, worker_auth_role, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[worker_id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            u'id': worker_id,
            u'status': worker.status,
            u'created': utcstr(worker.created),
            u'who': worker.who,
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=details.caller))

        # now actually fork the worker ..
        #
        self.log.info('{worker_logname} worker "{worker_id}" starting ..',
                      worker_logname=worker_logname, worker_id=worker_id)
        self.log.debug('{worker_logname} "{worker_id}" command line is "{cmdline}"',
                       worker_logname=worker_logname, worker_id=worker_id, cmdline=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            self.log.debug('Native worker "{worker_id}" connected',
                           worker_id=worker_id)

            worker.on_worker_connected(proto)

            # dynamically add a dedicated authrole to the router
            # for the worker we've just started
            self._node._add_worker_role(worker_auth_role, options)

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            self.log.error("Interal error: connection to forked native worker failed ({err})", err=err)

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
コード例 #11
0
ファイル: process.py プロジェクト: Paranaix/crossbar
    def start_guest(self, id, config, details=None):
        """
        Start a new guest process on this node.

        :param config: The guest process configuration.
        :type config: obj

        :returns: int -- The PID of the new process.
        """
        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(id)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.worker_already_running", emsg)

        try:
            checkconfig.check_guest(config)
        except Exception as e:
            raise ApplicationError(
                u"crossbar.error.invalid_configuration", "invalid guest worker configuration: {}".format(e)
            )

        options = config.get("options", {})

        # guest process working directory
        #
        workdir = self._node._cbdir
        if "workdir" in options:
            workdir = os.path.join(workdir, options["workdir"])
        workdir = os.path.abspath(workdir)

        # guest process executable and command line arguments
        #

        # first try to configure the fully qualified path for the guest
        # executable by joining workdir and configured exectuable ..
        exe = os.path.abspath(os.path.join(workdir, config["executable"]))

        if check_executable(exe):
            self.log.info("Using guest worker executable '{exe}' (executable path taken from configuration)", exe=exe)
        else:
            # try to detect the fully qualified path for the guest
            # executable by doing a "which" on the configured executable name
            exe = shutil.which(config["executable"])
            if exe is not None and check_executable(exe):
                self.log.info(
                    "Using guest worker executable '{exe}' (executable path detected from environment)", exe=exe
                )
            else:
                emsg = "Could not start worker: could not find and executable for '{}'".format(config["executable"])
                self.log.error(emsg)
                raise ApplicationError(u"crossbar.error.invalid_configuration", emsg)

        # guest process command line arguments
        #
        args = [exe]
        args.extend(config.get("arguments", []))

        # guest process environment
        #
        worker_env = create_process_env(options)

        # log name of worker
        #
        worker_logname = "Guest"

        # topic URIs used (later)
        #
        starting_topic = "crossbar.node.{}.on_guest_starting".format(self._node_id)
        started_topic = "crossbar.node.{}.on_guest_started".format(self._node_id)

        # add worker tracking instance to the worker map ..
        #
        worker = GuestWorkerProcess(self, id, details.caller, keeplog=options.get("traceback", None))

        self._workers[id] = worker

        # create a (custom) process endpoint
        #
        ep = WorkerProcessEndpoint(self._node._reactor, exe, args, path=workdir, env=worker_env, worker=worker)

        # ready handling
        #
        def on_ready_success(proto):

            worker.pid = proto.transport.pid
            worker.status = "started"
            worker.started = datetime.utcnow()

            self.log.info(
                "{worker} with ID '{id}' and PID {pid} started", worker=worker_logname, id=worker.id, pid=worker.pid
            )

            self._node._reactor.addSystemEventTrigger(
                "before", "shutdown", self._cleanup_worker, self._node._reactor, worker
            )

            # directory watcher
            #
            if "watch" in options:

                if HAS_FSNOTIFY:

                    # assemble list of watched directories
                    watched_dirs = []
                    for d in options["watch"].get("directories", []):
                        watched_dirs.append(os.path.abspath(os.path.join(self._node._cbdir, d)))

                    worker.watch_timeout = options["watch"].get("timeout", 1)

                    # create a directory watcher
                    worker.watcher = DirWatcher(dirs=watched_dirs, notify_once=True)

                    # make sure to stop the background thread running inside the
                    # watcher upon Twisted being shut down
                    def on_shutdown():
                        worker.watcher.stop()

                    self._node._reactor.addSystemEventTrigger("before", "shutdown", on_shutdown)

                    # this handler will get fired by the watcher upon detecting an FS event
                    def on_fsevent(evt):
                        worker.watcher.stop()
                        proto.signal("TERM")

                        if options["watch"].get("action", None) == "restart":
                            self.log.info("Restarting guest ..")
                            # Add a timeout large enough (perhaps add a config option later)
                            self._node._reactor.callLater(worker.watch_timeout, self.start_guest, id, config, details)
                            # Shut the worker down, after the restart event is scheduled
                            worker.stop()

                    # now run the watcher on a background thread
                    deferToThread(worker.watcher.loop, on_fsevent)

                else:
                    self.log.warn("Warning: cannot watch directory for changes - feature DirWatcher unavailable")

            # assemble guest worker startup information
            #
            started_info = {
                u"id": worker.id,
                u"status": worker.status,
                u"started": utcstr(worker.started),
                u"who": worker.who,
            }

            self.publish(started_topic, started_info, options=PublishOptions(exclude=details.caller))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = "Failed to start guest worker: {}".format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg, ep.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            self.log.info("Guest {id} exited with success", id=worker.id)
            del self._workers[worker.id]

        def on_exit_error(err):
            self.log.error("Guest {id} exited with error {err.value}", id=worker.id, err=err)
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_guest_worker_client_factory(config, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {u"id": id, u"status": worker.status, u"created": utcstr(worker.created), u"who": worker.who}

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic, starting_info, options=PublishOptions(exclude=details.caller))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...", worker=worker_logname, id=id)
        self.log.debug(
            "{worker} '{id}' using command line '{cli}'...", worker=worker_logname, id=id, cli=" ".join(args)
        )

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will
            # errback - probably only if the forking of a new process fails
            # at OS level due to out of memory conditions or such.

            pid = proto.transport.pid
            self.log.debug("Guest worker process connected with PID {pid}", pid=pid)

            worker.pid = pid

            # proto is an instance of GuestWorkerClientProtocol
            worker.proto = proto

            worker.status = "connected"
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all .. see above.
            self.log.error("Internal error: connection to forked guest worker failed ({})".format(err))

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready