예제 #1
0
파일: lava-master.py 프로젝트: slawr/lava
    def main_loop(self, options):
        last_schedule = last_dispatcher_check = time.time()
        encryption_settings = get_encryption_settings(options)

        while True:
            try:
                try:
                    # Compute the timeout
                    now = time.time()
                    timeout = min(
                        SCHEDULE_INTERVAL - (now - last_schedule),
                        PING_INTERVAL - (now - last_dispatcher_check),
                    )
                    # If some actions are remaining, decrease the timeout
                    if any([self.events[k] for k in self.events.keys()]):
                        timeout = min(timeout, 2)
                    # Wait at least for 1ms
                    timeout = max(timeout * 1000, 1)

                    # Wait for data or a timeout
                    sockets = dict(self.poller.poll(timeout))
                except zmq.error.ZMQError:
                    continue

                if sockets.get(self.pipe_r) == zmq.POLLIN:
                    self.logger.info("[POLL] Received a signal, leaving")
                    break

                # Command socket
                if sockets.get(self.controler) == zmq.POLLIN:
                    while self.controler_socket(
                    ):  # Unqueue all pending messages
                        pass

                # Events socket
                if sockets.get(self.event_socket) == zmq.POLLIN:
                    self.logger.info("[EVENT] handling events")
                    while self.read_event_socket(
                    ):  # Unqueue all pending messages
                        pass
                    # Wait for the next iteration to handle the event.
                    # In fact, the code that generated the event (lava-logs or
                    # lava-server-gunicorn) needs some time to commit the
                    # database transaction.
                    # If we are too fast, the database object won't be
                    # available (or in the right state) yet.
                    continue

                # Inotify socket
                if sockets.get(self.inotify_fd) == zmq.POLLIN:
                    os.read(self.inotify_fd, 4096)
                    if self.auth is not None:
                        self.logger.info(
                            "[AUTH] Reloading certificates from %s",
                            encryption_settings["slaves_certs"],
                        )
                        self.auth.configure_curve(
                            domain="*",
                            location=encryption_settings["slaves_certs"])
                    else:
                        self.logger.error(
                            "[AUTH] New certificates in %s but encryption is disabled",
                            encryption_settings["slaves_certs"],
                        )

                # Check dispatchers status
                now = time.time()
                if now - last_dispatcher_check > PING_INTERVAL:
                    for hostname, dispatcher in self.dispatchers.items():
                        if (dispatcher.online and now - dispatcher.last_msg >
                                DISPATCHER_TIMEOUT):
                            if hostname == "lava-logs":
                                self.logger.error(
                                    "[STATE] lava-logs goes OFFLINE")
                            else:
                                self.logger.error(
                                    "[STATE] Dispatcher <%s> goes OFFLINE",
                                    hostname)
                            self.dispatchers[hostname].go_offline()
                    last_dispatcher_check = now

                # Limit accesses to the database. This will also limit the rate of
                # CANCEL and START messages
                if time.time() - last_schedule > SCHEDULE_INTERVAL:
                    if self.dispatchers["lava-logs"].online:
                        schedule(self.logger)

                        # Dispatch scheduled jobs
                        with transaction.atomic():
                            self.start_jobs()
                    else:
                        self.logger.warning(
                            "lava-logs is offline: can't schedule jobs")

                    # Handle canceling jobs
                    with transaction.atomic():
                        self.cancel_jobs()

                    # Do not count the time taken to schedule jobs
                    last_schedule = time.time()
                else:
                    # Cancel the jobs and remove the jobs from the set
                    if self.events["canceling"]:
                        with transaction.atomic():
                            self.cancel_jobs(partial=True)
                        self.events["canceling"] = set()
                    # Schedule for available device-types
                    if self.events["available_dt"]:
                        jobs = schedule(self.logger,
                                        self.events["available_dt"])
                        self.events["available_dt"] = set()
                        # Dispatch scheduled jobs
                        with transaction.atomic():
                            self.start_jobs(jobs)

            except (OperationalError, InterfaceError):
                self.logger.info("[RESET] database connection reset.")
                # Closing the database connection will force Django to reopen
                # the connection
                connection.close()
                time.sleep(2)
예제 #2
0
    def handle(self, *args, **options):
        # Initialize logging.
        self.setup_logging("lava-logs", options["level"], options["log_file"],
                           FORMAT)

        self.logger.info("[INIT] Starting lava-logs")
        self.logger.info("[INIT] Version %s", __version__)

        self.logger.info("[INIT] Dropping privileges")
        if not self.drop_privileges(options["user"], options["group"]):
            self.logger.error("[INIT] Unable to drop privileges")
            return

        filename = os.path.join(settings.MEDIA_ROOT, "lava-logs-config.yaml")
        self.logger.debug("[INIT] Dumping config to %s", filename)
        with open(filename, "w") as output:
            yaml_dump(options, output)

        # Create the sockets
        context = zmq.Context()
        self.log_socket = context.socket(zmq.PULL)
        self.controler = context.socket(zmq.ROUTER)
        self.controler.setsockopt(zmq.IDENTITY, b"lava-logs")
        # Limit the number of messages in the queue
        self.controler.setsockopt(zmq.SNDHWM, 2)
        # From http://api.zeromq.org/4-2:zmq-setsockopt#toc5
        # "Immediately readies that connection for data transfer with the master"
        self.controler.setsockopt(zmq.CONNECT_RID, b"master")

        if options["ipv6"]:
            self.logger.info("[INIT] Enabling IPv6")
            self.log_socket.setsockopt(zmq.IPV6, 1)
            self.controler.setsockopt(zmq.IPV6, 1)

        encryption_settings = get_encryption_settings(options)
        if encryption_settings["encrypt"]:
            self.logger.info("[INIT] Starting encryption")
            try:
                self.auth = ThreadAuthenticator(context)
                self.auth.start()
                self.logger.debug(
                    "[INIT] Opening master certificate: %s",
                    encryption_settings["master_cert"],
                )
                master_public, master_secret = zmq.auth.load_certificate(
                    encryption_settings["master_cert"])
                self.logger.debug(
                    "[INIT] Using slaves certificates from: %s",
                    encryption_settings["slaves_certs"],
                )
                self.auth.configure_curve(
                    domain="*", location=encryption_settings["slaves_certs"])
            except OSError as err:
                self.logger.error("[INIT] %s", err)
                self.auth.stop()
                return
            self.log_socket.curve_publickey = master_public
            self.log_socket.curve_secretkey = master_secret
            self.log_socket.curve_server = True
            self.controler.curve_publickey = master_public
            self.controler.curve_secretkey = master_secret
            self.controler.curve_serverkey = master_public

        self.logger.debug("[INIT] Watching %s",
                          encryption_settings["slaves_certs"])
        self.cert_dir_path = encryption_settings["slaves_certs"]
        self.inotify_fd = watch_directory(encryption_settings["slaves_certs"])
        if self.inotify_fd is None:
            self.logger.error("[INIT] Unable to start inotify")

        self.log_socket.bind(options["socket"])
        self.controler.connect(options["master_socket"])

        # Poll on the sockets. This allow to have a
        # nice timeout along with polling.
        self.poller = zmq.Poller()
        self.poller.register(self.log_socket, zmq.POLLIN)
        self.poller.register(self.controler, zmq.POLLIN)
        if self.inotify_fd is not None:
            self.poller.register(os.fdopen(self.inotify_fd), zmq.POLLIN)

        # Translate signals into zmq messages
        (self.pipe_r, _) = self.setup_zmq_signal_handler()
        self.poller.register(self.pipe_r, zmq.POLLIN)

        self.logger.info("[INIT] listening for logs")
        # PING right now: the master is waiting for this message to start
        # scheduling.
        self.controler.send_multipart([b"master", b"PING"])

        try:
            self.main_loop()
        except BaseException as exc:
            self.logger.error("[EXIT] Unknown exception raised, leaving!")
            self.logger.exception(exc)

        # Close the controler socket
        self.controler.close(linger=0)
        self.poller.unregister(self.controler)

        # Carefully close the logging socket as we don't want to lose messages
        self.logger.info(
            "[EXIT] Disconnect logging socket and process messages")
        endpoint = u(self.log_socket.getsockopt(zmq.LAST_ENDPOINT))
        self.logger.debug("[EXIT] unbinding from '%s'", endpoint)
        self.log_socket.unbind(endpoint)

        # Empty the queue
        try:
            while self.wait_for_messages(True):
                # Flush test cases cache for every iteration because we might
                # get killed soon.
                self.flush_test_cases()
        except BaseException as exc:
            self.logger.error("[EXIT] Unknown exception raised, leaving!")
            self.logger.exception(exc)
        finally:
            # Last flush
            self.flush_test_cases()
            self.logger.info(
                "[EXIT] Closing the logging socket: the queue is empty")
            self.log_socket.close()
            if encryption_settings["encrypt"]:
                self.auth.stop()
            context.term()
예제 #3
0
파일: lava-master.py 프로젝트: slawr/lava
    def handle(self, *args, **options):
        # Initialize logging.
        self.setup_logging("lava-master", options["level"],
                           options["log_file"], FORMAT)

        self.logger.info("[INIT] Starting lava-master")
        self.logger.info("[INIT] Version %s", __version__)
        self.logger.info("[INIT] Using protocol version %d", PROTOCOL_VERSION)

        self.logger.info("[INIT] Dropping privileges")
        if not self.drop_privileges(options["user"], options["group"]):
            self.logger.error("[INIT] Unable to drop privileges")
            return

        filename = os.path.join(settings.MEDIA_ROOT, "lava-master-config.yaml")
        self.logger.debug("[INIT] Dumping config to %s", filename)
        with open(filename, "w") as output:
            yaml_dump(options, output)

        self.logger.info("[INIT] Marking all workers as offline")
        with transaction.atomic():
            for worker in Worker.objects.select_for_update().all():
                worker.go_state_offline()
                worker.save()

        # Create the sockets
        context = zmq.Context()
        self.controler = context.socket(zmq.ROUTER)
        self.event_socket = context.socket(zmq.SUB)

        if options["ipv6"]:
            self.logger.info("[INIT] Enabling IPv6")
            self.controler.setsockopt(zmq.IPV6, 1)
            self.event_socket.setsockopt(zmq.IPV6, 1)

        encryption_settings = get_encryption_settings(options)
        if encryption_settings["encrypt"]:
            self.logger.info("[INIT] Starting encryption")
            try:
                self.auth = ThreadAuthenticator(context)
                self.auth.start()
                self.logger.debug(
                    "[INIT] Opening master certificate: %s",
                    encryption_settings["master_cert"],
                )
                master_public, master_secret = zmq.auth.load_certificate(
                    encryption_settings["master_cert"])
                self.logger.debug(
                    "[INIT] Using slaves certificates from: %s",
                    encryption_settings["slaves_certs"],
                )
                self.auth.configure_curve(
                    domain="*", location=encryption_settings["slaves_certs"])
            except OSError as err:
                self.logger.error(err)
                self.auth.stop()
                return
            self.controler.curve_publickey = master_public
            self.controler.curve_secretkey = master_secret
            self.controler.curve_server = True

            self.logger.debug("[INIT] Watching %s",
                              encryption_settings["slaves_certs"])
            self.inotify_fd = watch_directory(
                encryption_settings["slaves_certs"])
            if self.inotify_fd is None:
                self.logger.error("[INIT] Unable to start inotify")

        self.controler.setsockopt(zmq.IDENTITY, b"master")
        # From http://api.zeromq.org/4-2:zmq-setsockopt#toc42
        # "If two clients use the same identity when connecting to a ROUTER
        # [...] the ROUTER socket shall hand-over the connection to the new
        # client and disconnect the existing one."
        self.controler.setsockopt(zmq.ROUTER_HANDOVER, 1)
        self.controler.bind(options["master_socket"])

        # Set the topic and connect
        self.event_socket.setsockopt(zmq.SUBSCRIBE, b(settings.EVENT_TOPIC))
        self.event_socket.connect(options["event_url"])

        # Poll on the sockets. This allow to have a
        # nice timeout along with polling.
        self.poller = zmq.Poller()
        self.poller.register(self.controler, zmq.POLLIN)
        self.poller.register(self.event_socket, zmq.POLLIN)
        if self.inotify_fd is not None:
            self.poller.register(os.fdopen(self.inotify_fd), zmq.POLLIN)

        # Translate signals into zmq messages
        (self.pipe_r, _) = self.setup_zmq_signal_handler()
        self.poller.register(self.pipe_r, zmq.POLLIN)

        # Send master upgrade notifications.
        send_upgraded_master_notifications(__version__, self.logger)

        # Main loop
        self.logger.info("[INIT] Starting main loop")
        try:
            self.main_loop(options)
        except BaseException as exc:
            self.logger.error("[CLOSE] Unknown exception raised, leaving!")
            self.logger.exception(exc)
        finally:
            # Drop controler socket: the protocol does handle lost messages
            self.logger.info(
                "[CLOSE] Closing the controler socket and dropping messages")
            self.controler.close(linger=0)
            self.event_socket.close(linger=0)
            if encryption_settings["encrypt"]:
                self.auth.stop()
            context.term()