Beispiel #1
0
 def run_container_start_checks(self, started_at, timeout_at):
     """
     Run startup checks.
     """
     checks_start_time = time.time()
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if self._container_start_checks():
             break
     else:
         log.error(
             "Failed to run container start checks after %1.2f seconds",
             time.time() - checks_start_time,
         )
         return False
     check_ports = set(self.get_check_ports())
     if not check_ports:
         return True
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if not check_ports:
             break
         check_ports -= ports.get_connectable_ports(check_ports)
         if check_ports:
             time.sleep(0.5)
     else:
         log.error("Failed to check ports after %1.2f seconds", time.time() - checks_start_time)
         return False
     return True
Beispiel #2
0
    def wait_for_events(self, patterns, timeout=30, after_time=None):
        """
        Wait for a set of patterns to match or until timeout is reached.

        :param ~collections.abc.Sequence pattern:
            An iterable of tuples in the form of ``("<daemon-id>", "<event-tag-pattern>")``, ie, which daemon ID
            we're targeting and the event tag pattern which will be passed to :py:func:`~fnmatch.fnmatch` to
            assert a match.
        :keyword int,float timeout:
            The amount of time to wait for the events, in seconds.
        :keyword ~datetime.datetime,float after_time:
            After which time to start matching events.

        :return:
            An instance of :py:class:`~saltfactories.plugins.event_listener.MatchedEvents`.
        :rtype ~saltfactories.plugins.event_listener.MatchedEvents:
        """
        if after_time is None:
            after_time = datetime.utcnow()
        elif isinstance(after_time, float):
            after_time = datetime.utcfromtimestamp(after_time)
        after_time_iso = after_time.isoformat()
        log.debug(
            "%s is waiting for event patterns happening after %s: %s",
            self,
            after_time_iso,
            set(patterns),
        )
        found_events = set()
        patterns = set(patterns)
        timeout_at = time.time() + timeout
        while True:
            if not patterns:
                return True
            for event in copy.copy(self.store):
                if event.expired:
                    # Too old, carry on
                    continue
                if event.stamp < after_time:
                    continue
                for pattern in set(patterns):
                    _daemon_id, _pattern = pattern
                    if event.daemon_id != _daemon_id:
                        continue
                    if fnmatch.fnmatch(event.tag, _pattern):
                        log.debug("%s Found matching pattern: %s", self,
                                  pattern)
                        found_events.add(event)
                        patterns.remove((event.daemon_id, _pattern))
            if not patterns:
                break
            if time.time() > timeout_at:
                break
            time.sleep(0.5)
        return MatchedEvents(matches=found_events, missed=patterns)
Beispiel #3
0
    def _cleanup(self):
        cleanup_at = time.time() + 30
        while self.running_event.is_set():
            if time.time() < cleanup_at:
                time.sleep(1)
                continue

            # Reset cleanup time
            cleanup_at = time.time() + 30

            # Cleanup expired events
            to_remove = []
            for event in self.store:
                if event.expired:
                    to_remove.append(event)

            for event in to_remove:
                log.debug("%s Removing from event store: %s", self, event)
                self.store.remove(event)
            log.debug("%s store size after cleanup: %s", self, len(self.store))
    def run_start_checks(self, started_at, timeout_at):
        """
        Run checks to confirm that the daemon has started.
        """
        if not super().run_start_checks(started_at, timeout_at):
            return False
        if not self.event_listener:  # pragma: no cover
            log.debug(
                "The 'event_listener' attribute is not set. Not checking events..."
            )
            return True

        check_events = set(self.get_check_events())
        if not check_events:
            log.debug("No events to listen to for %s", self)
            return True
        log.debug("Events to check for %s: %s", self,
                  set(self.get_check_events()))
        checks_start_time = time.time()
        while time.time() <= timeout_at:
            if not self.is_running():
                raise FactoryNotStarted("{} is no longer running".format(self))
            if not check_events:
                break
            check_events -= {(event.daemon_id, event.tag)
                             for event in self.event_listener.get_events(
                                 check_events, after_time=started_at)}
            if check_events:
                time.sleep(1.5)
        else:
            log.error(
                "Failed to check events after %1.2f seconds for %s. Remaining events to check: %s",
                time.time() - checks_start_time,
                self,
                check_events,
            )
            return False
        log.debug("All events checked for %s: %s", self,
                  set(self.get_check_events()))
        return True
Beispiel #5
0
    def start(self, *command, max_start_attempts=None, start_timeout=None):
        """
        Start the container.
        """
        if self.is_running():
            log.warning("%s is already running.", self)
            return True
        self._terminate_result = None
        atexit.register(self.terminate)
        factory_started = False
        for callback, args, kwargs in self._before_start_callbacks:
            try:
                callback(*args, **kwargs)
            except Exception as exc:  # pragma: no cover pylint: disable=broad-except
                log.info(
                    "Exception raised when running %s: %s",
                    format_callback_to_string(callback, args, kwargs),
                    exc,
                    exc_info=True,
                )

        start_time = time.time()
        start_attempts = max_start_attempts or self.max_start_attempts
        current_attempt = 0
        while current_attempt <= start_attempts:
            current_attempt += 1
            if factory_started:
                break
            log.info("Starting %s. Attempt: %d of %d", self, current_attempt, start_attempts)
            current_start_time = time.time()
            start_running_timeout = current_start_time + (start_timeout or self.start_timeout)

            # Start the container
            self.container = self.docker_client.containers.run(
                self.image,
                name=self.name,
                detach=True,
                stdin_open=True,
                command=list(command) or None,
                **self.container_run_kwargs,
            )
            while time.time() <= start_running_timeout:
                # Don't know why, but if self.container wasn't previously in a running
                # state, and now it is, we have to re-set the self.container attribute
                # so that it gives valid status information
                self.container = self.docker_client.containers.get(self.name)
                if self.container.status != "running":
                    time.sleep(0.25)
                    continue

                self.container = self.docker_client.containers.get(self.name)
                logs = self.container.logs(stdout=True, stderr=True, stream=False)
                if isinstance(logs, bytes):
                    stdout = logs.decode()
                    stderr = None
                else:
                    stdout = logs[0].decode()
                    stderr = logs[1].decode()
                if stdout and stderr:
                    log.info("Running Container Logs:\n%s\n%s", stdout, stderr)
                elif stdout:
                    log.info("Running Container Logs:\n%s", stdout)

                # If we reached this far it means that we got the running status above, and
                # now that the container has started, run start checks
                try:
                    if (
                        self.run_container_start_checks(current_start_time, start_running_timeout)
                        is False
                    ):
                        time.sleep(0.5)
                        continue
                except FactoryNotStarted:
                    self.terminate()
                    break
                log.info(
                    "The %s factory is running after %d attempts. Took %1.2f seconds",
                    self,
                    current_attempt,
                    time.time() - start_time,
                )
                factory_started = True
                break
            else:
                # We reached start_running_timeout, re-try
                try:
                    self.container.remove(force=True)
                    self.container.wait()
                except APIError:
                    pass
                self.container = None
        else:
            # The factory failed to confirm it's running status
            self.terminate()
        if factory_started:
            for callback, args, kwargs in self._after_start_callbacks:
                try:
                    callback(*args, **kwargs)
                except Exception as exc:  # pragma: no cover pylint: disable=broad-except
                    log.info(
                        "Exception raised when running %s: %s",
                        format_callback_to_string(callback, args, kwargs),
                        exc,
                        exc_info=True,
                    )
            # TODO: Add containers to the processes stats?!
            # if self.factories_manager and self.factories_manager.stats_processes is not None:
            #    self.factories_manager.stats_processes[self.get_display_name()] = psutil.Process(
            #        self.pid
            #    )
            return factory_started
        result = self.terminate()
        raise FactoryNotStarted(
            "The {} factory has failed to confirm running status after {} attempts, which "
            "took {:.2f} seconds({:.2f} seconds each)".format(
                self,
                current_attempt - 1,
                time.time() - start_time,
                start_timeout or self.start_timeout,
            ),
            process_result=result,
        )
Beispiel #6
0
    def process_logs(self):
        """
        Process the logs returned.
        """
        address = "tcp://{}:{}".format(self.log_host, self.log_port)
        context = zmq.Context()
        puller = context.socket(zmq.PULL)
        puller.set_hwm(self.socket_hwm)
        exit_timeout_seconds = 5
        exit_timeout = None
        try:
            puller.bind(address)
        except zmq.ZMQError:  # pragma: no cover
            log.exception("%s Unable to bind to puller at %s", self, address)
            return
        try:
            self.running_event.set()
            poller = zmq.Poller()
            poller.register(puller, zmq.POLLIN)
            while True:
                if not self.running_event.is_set():
                    if exit_timeout is None:
                        log.debug(
                            "%s Waiting %d seconds to process any remaning log messages "
                            "before exiting...",
                            self,
                            exit_timeout_seconds,
                        )
                        exit_timeout = time.time() + exit_timeout_seconds

                    if time.time() >= exit_timeout:
                        log.debug(
                            "%s Unable to process remaining log messages in time. Exiting anyway.",
                            self,
                        )
                        break
                try:
                    if not poller.poll(1000):
                        continue
                    msg = puller.recv()
                    if msgpack.version >= (0, 5, 2):
                        record_dict = msgpack.loads(msg, raw=False)
                    else:  # pragma: no cover
                        record_dict = msgpack.loads(msg, encoding="utf-8")
                    if record_dict is None:
                        # A sentinel to stop processing the queue
                        log.info("%s Received the sentinel to shutdown", self)
                        self.sentinel_event.set()
                        break
                    try:
                        record_dict["message"]
                    except KeyError:  # pragma: no cover
                        # This log record was msgpack dumped from Py2
                        for key, value in record_dict.copy().items():
                            skip_update = True
                            if isinstance(value, bytes):
                                value = value.decode("utf-8")
                                skip_update = False
                            if isinstance(key, bytes):
                                key = key.decode("utf-8")
                                skip_update = False
                            if skip_update is False:
                                record_dict[key] = value
                    # Just log everything, filtering will happen on the main process
                    # logging handlers
                    record = logging.makeLogRecord(record_dict)
                    logger = logging.getLogger(record.name)
                    logger.handle(record)
                except (EOFError, KeyboardInterrupt,
                        SystemExit):  # pragma: no cover
                    break
                except Exception as exc:  # pragma: no cover pylint: disable=broad-except
                    log.warning(
                        "%s An exception occurred in the processing queue thread: %s",
                        self,
                        exc,
                        exc_info=True,
                    )
        finally:
            puller.close(1)
            context.term()
        log.debug("%s Process log thread terminated", self)