def run_container_start_checks(self, started_at, timeout_at): checks_start_time = time.time() while time.time() <= timeout_at: if not self.is_running(): raise FactoryNotStarted("{} is no longer running".format(self)) if self._container_start_checks(): break else: log.error( "Failed to run container start checks after %1.2f seconds", time.time() - checks_start_time, ) return False check_ports = set(self.get_check_ports()) if not check_ports: return True while time.time() <= timeout_at: if not self.is_running(): raise FactoryNotStarted("{} is no longer running".format(self)) if not check_ports: break check_ports -= ports.get_connectable_ports(check_ports) if check_ports: time.sleep(0.5) else: log.error( "Failed to check ports after %1.2f seconds", time.time() - checks_start_time, ) return False return True
def run_start_checks(self, started_at, timeout_at): check_ports = set(self.get_check_ports()) if not check_ports: log.debug("No ports to check connection to for %s", self) return True checks_start_time = time.time() while time.time() <= timeout_at: if not self.is_running(): raise FactoryNotStarted("{} is no longer running".format(self)) if not check_ports: break check_ports -= ports.get_connectable_ports(check_ports) if check_ports: time.sleep(0.5) else: log.error( "Failed to check ports after %1.2f seconds for %s", time.time() - checks_start_time, self, ) return False log.debug("Successfuly connected to all ports(%s) for %s", set(self.get_check_ports()), self) return True
def start_factory(factory_class, start_timeout=10, max_attempts=3, event_listener=None, salt_factories=None, **factory_class_kwargs): """ Returns a running factory Args: cli_script_name(str): The CLI script which starts the daemon daemon_class(:py:class:`~saltfactories.utils.processes.bases.FactoryDaemonScriptBase`): The class to use to instantiate the factory instance. start_timeout(int): The amount of time, in seconds, to wait, until a factory is considered as not started. max_attempts(int): How many times to attempt to start the daemon in case of failure event_listener(:py:class:`~saltfactories.utils.event_listener.EventListener`): An instance of :py:class:`~saltfactories.utils.event_listener.EventListener` in case the daemon is a salt daemon. **daemon_class_kwargs(dict): Keyword arguments to pass to the ``daemon_class`` when instantiating it. Raises: FactoryNotStarted: Raised when a factory fails to start or when the code used to confirm that the daemon is up also fails. RuntimeError: `RuntimeError` is raised when a factory defines :py:meth:`~saltfactories.utils.processes.salts.SaltDaemonScriptBase.get_check_events` but no ``event_listener`` argument was passed. Returns: An instance of the ``factory_class``, which is a subclass of :py:class:`~saltfactories.utils.processes.bases.FactoryDaemonScriptBase` """ attempts = 1 checks_start_time = time.time() while attempts <= max_attempts: # pylint: disable=too-many-nested-blocks factory = factory_class(**factory_class_kwargs) log.info("Starting %s. Attempt: %s", factory, attempts) start_time = time.time() checks_expire_time = start_time + start_timeout factory.start() attempts += 1 if factory.is_running(): try: try: check_ports = set(factory.get_check_ports()) if check_ports: log.debug("Checking %s for connectable ports: %s", factory, check_ports) except AttributeError: check_ports = False try: check_events = set(factory.get_check_events()) if not event_listener: factory.terminate() raise RuntimeError( "Factory {} want's to have events checked but no 'event_listener' was " "passed to start_daemon()".format(factory)) log.debug("Checking %s for event patterns: %s", factory, check_events) except AttributeError: check_events = False try: extra_checks_method = factory.run_extra_checks extra_checks_passed = False if not salt_factories: raise RuntimeError( "Factory {} defines the run_extra_checks method but no 'salt_factories' was " "passed to start_daemon()".format(factory)) except AttributeError: extra_checks_method = False extra_checks_passed = True all_checks_passed = False while time.time() <= checks_expire_time: if not factory.is_running(): # If meanwhile the factory dies, break the loop break if not check_ports and not check_events and extra_checks_passed: # If either there are no ports and no events to check, or, # they've all been checked, break the loop all_checks_passed = True break if check_ports: check_ports -= ports.get_connectable_ports(check_ports) if check_events: check_events -= event_listener.get_events( check_events, after_time=start_time) if extra_checks_method and not check_events and not check_events: # Only run the extra checks after check_events and check_ports are # checked since those likely involve shelling out extra_checks_passed = extra_checks_method( salt_factories) # Let's not peg the CPU time.sleep(0.5) if all_checks_passed is False: result = factory.terminate() if attempts >= max_attempts: raise FactoryNotStarted( "The {} factory has failed to confirm running status after {} attempts, which " "took {:.2f} seconds({:.2f} seconds each)".format( factory, attempts, time.time() - checks_start_time, start_timeout, ), stdout=result.stdout, stderr=result.stderr, exitcode=result.exitcode, ) continue except FactoryNotStarted: raise except Exception as exc: # pylint: disable=broad-except log.exception("Exception caught on %s: %s", factory, exc, exc_info=True) result = factory.terminate() if attempts >= max_attempts: raise FactoryNotStarted( "The {} factory has failed to confirm running status after {} attempts and raised an " "exception: {}. Took {:.2f} seconds({:.2f} seconds each attempt)." .format( factory, attempts, str(exc), time.time() - start_time, start_timeout, ), stdout=result.stdout, stderr=result.stderr, exitcode=result.exitcode, exc=sys.exc_info(), ) # A little pause before retrying time.sleep(1) continue # A little breathing before returning the factory time.sleep(0.25) log.info( "The %s factory is running after %d attempts. Took %1.2f seconds", factory, attempts, time.time() - checks_start_time, ) break else: factory.terminate() # A little pause before retrying time.sleep(1) continue else: stderr = stdout = exitcode = None if factory is not None: result = factory.terminate() stderr = result.stderr stdout = result.stdout exitcode = result.exitcode raise FactoryNotStarted( "The {} factory has failed to confirm running status after {} attempts, which " "took {:.2f} seconds.".format(factory, attempts, time.time() - checks_start_time), stdout=stdout, stderr=stderr, exitcode=exitcode, ) return factory