def wait_for_ready_nodes( self, count: int, timeout: int = None, interval: Union[int, float] = 1, ) -> None: """Wait until there are at least ``count`` number of nodes available in the cluster. Notes: This should only be used for clusters that auto-scale the nodes. This will not create/delete nodes on its own. Args: count: The number of nodes to wait for. timeout: The maximum time to wait, in seconds. interval: The time, in seconds, to sleep before re-checking the number of nodes. """ def node_count_match(node_count): nodes = self.get_nodes() return [n.is_ready() for n in nodes.values()].count(True) >= node_count wait_condition = Condition( f'wait for {count} nodes', node_count_match, count, ) utils.wait_for_condition( condition=wait_condition, timeout=timeout, interval=interval, )
def wait_until_ready(self, timeout=None, interval=1, fail_on_api_error=False): """Wait until the resource is in the ready state. Args: timeout (int): The maximum time to wait, in seconds, for the resource to reach the ready state. If unspecified, this will wait indefinitely. If specified and the timeout is met or exceeded, a TimeoutError will be raised. interval (int|float): The time, in seconds, to wait before re-checking if the object is ready. fail_on_api_error (bool): Fail if an API error is raised. An API error can be raised for a number of reasons, such as 'resource not found', which could be the case when a resource is just being started or restarted. When waiting for readiness we generally do not want to fail on these conditions. (default: False) Raises: TimeoutError: The specified timeout was exceeded. """ ready_condition = condition.Condition( 'api object ready', self.is_ready, ) utils.wait_for_condition( condition=ready_condition, timeout=timeout, interval=interval, fail_on_api_error=fail_on_api_error, )
def wait_until_created( obj: objects.ApiObject, timeout: int = None, interval: Union[int, float] = 1, ) -> None: """Wait until the specified object has been created. Here, creation is judged on whether or not refreshing the object (e.g. getting it) returns an object (created) or an error (not yet created). Args: obj: The ApiObject to wait on. timeout: The maximum time to wait, in seconds. interval: The time, in seconds, to sleep before re-checking the created state of the object. """ def check_ready(api_obj): try: api_obj.refresh() except: # noqa return False return True wait_condition = Condition( f'wait for {type(obj).__name__}:{obj.name} to be created', check_ready, obj, ) utils.wait_for_condition( condition=wait_condition, timeout=timeout, interval=interval )
def wait_for_registered(self, timeout: int = None, interval: Union[int, float] = 1) -> None: """Wait for all of the pre-registered objects to be ready on the cluster. An object is pre-registered with the test client if it is specified to the test via the ``applymanifests`` pytest marker. The marker will load the manifest and add the object to the cluster, and register it with the test client. This method waits until all such loaded manifest objects are in the ready state simultaneously. Args: timeout: The maximum time to wait, in seconds. interval: The time, in seconds, to sleep before re-checking the ready state for pre-registered objects. """ def check_registered(): for obj in self.pre_registered: if not obj.is_ready(): return False return True wait_condition = Condition( 'wait for pre-registered objects to be ready', check_registered, ) utils.wait_for_condition( condition=wait_condition, timeout=timeout, interval=interval, )
def wait_until_containers_start(self, timeout: int = None) -> None: """Wait until all containers in the Pod have started. This will wait for the images to be pulled and for the containers to be created and started. This will unblock once all Pod containers have been started. This is different than waiting until ready, since a container may not be ready immediately after it has been started. Args: timeout: The maximum time to wait, in seconds, for the Pod's containers to be started. If unspecified, this will wait indefinitely. If specified and the timeout is met or exceeded, a TimeoutError will be raised. Raises: TimeoutError: The specified timeout was exceeded. """ wait_condition = condition.Condition( 'all pod containers started', self.containers_started, ) utils.wait_for_condition( condition=wait_condition, timeout=timeout, interval=1, )
def wait_until_job_completes( self, timeout: int = None, interval: Union[int, float] = 5, fail_on_api_error: bool = False, ) -> None: """Wait until the orbit job completes. Can have completed or failed state Args: timeout: The maximum time to wait, in seconds, for the resource to reach the ready state. If unspecified, this will wait indefinitely. If specified and the timeout is met or exceeded, a TimeoutError will be raised. interval: The time, in seconds, to wait before re-checking if the object is ready. fail_on_api_error: Fail if an API error is raised. An API error can be raised for a number of reasons, such as 'resource not found', which could be the case when a resource is just being started or restarted. When waiting for readiness we generally do not want to fail on these conditions. Raises: TimeoutError: The specified timeout was exceeded. """ job_complete_condition = condition.Condition( "orbit job status check", self.is_complete, ) # Wait until Orbit job completes utils.wait_for_condition( condition=job_complete_condition, timeout=timeout, interval=interval, fail_on_api_error=fail_on_api_error, )
def wait_until_userspace_installation( self, timeout: int = None, interval: Union[int, float] = 5, fail_on_api_error: bool = False, ) -> None: userspace_condition = condition.Condition( "orbit userspace status check", self.is_complete, ) # Wait until UserSpace based Helm charts install utils.wait_for_condition( condition=userspace_condition, timeout=timeout, interval=interval, fail_on_api_error=fail_on_api_error, )
def wait_for_load_balancer_ingress(self, timeout: int = None) -> None: """Wait until the ingress has been assigned an ingress. Args: timeout: The maximum time to wait in seconds, for the Ingress to be assigned an ingress. If unspecified, this will wait indefinitely. If specified and the timeout is met or exceeded, a TimeoutError will be raised. Raises: TimeoutError: The specified timeout was exceeded. """ wait_condition = condition.Condition( 'Ingress has been assigned an ingress', self.has_load_balancer_ingress) utils.wait_for_condition(condition=wait_condition, timeout=timeout, interval=1)
def wait_until_deleted(self, timeout=None, interval=1): """Wait until the resource is deleted from the cluster. Args: timeout (int): The maximum time to wait, in seconds, for the resource to be deleted from the cluster. If unspecified, this will wait indefinitely. If specified and the timeout is met or exceeded, a TimeoutError will be raised. interval (int|float): The time, in seconds, to wait before re-checking if the object has been deleted. Raises: TimeoutError: The specified timeout was exceeded. """ def deleted_fn(): try: self.refresh() except ApiException as e: # If we can no longer find the deployment, it is deleted. # If we get any other exception, raise it. if e.status == 404 and e.reason == 'Not Found': return True else: log.error('error refreshing object state') raise e else: # The object was still found, so it has not been deleted return False delete_condition = condition.Condition( 'api object deleted', deleted_fn ) utils.wait_for_condition( condition=delete_condition, timeout=timeout, interval=interval, )
def wait_for_conditions( *args: Condition, timeout: int = None, interval: Union[float, int] = 1, policy: Policy = Policy.ONCE, fail_on_api_error: bool = True, ) -> None: """Wait for all of the provided Conditions to be met. All Conditions must be met for this to unblock. If no Conditions are provided, this method will do nothing. Args: *args: Conditions to check. timeout: The maximum time to wait, in seconds, for the provided Conditions to be met. If all of the Conditions are not met within the given timeout, this will raise a TimeoutError. By default, there is no timeout so this will wait indefinitely. interval: The time, in seconds, to sleep before re-evaluating the conditions. Default: 1s policy: The condition checking policy that defines the checking behavior. Default: ONCE fail_on_api_error: Fail the condition checks if a Kubernetes API error is incurred. An API error can be raised for a number of reasons, including a Pod being restarted and temporarily unavailable. Disabling this will cause those errors to be ignored, allowing the check to continue until timeout or resolution. (default: True). Raises: TimeoutError: The Conditions were not met within the specified timeout period. ValueError: Not all arguments are a Condition. """ # If no Conditions were given, there is nothing to do. if not args: return # If something was given, make sure they are all Conditions if not all(map(lambda c: isinstance(c, Condition), args)): raise ValueError('All arguments must be a Condition') # make a copy of the conditions to_check = list(args) def condition_checker(conditions): # check that the conditions were met according to the # condition checking policy met, unmet = check_and_sort(*conditions) if policy == Policy.ONCE: log.info(f'check met: {met}') conditions[:] = unmet return len(unmet) == 0 elif policy == Policy.SIMULTANEOUS: return len(unmet) == 0 and len(met) == len(args) else: raise ValueError( f'Invalid condition policy specified: {policy}', ) wait_condition = Condition( 'wait for conditions', condition_checker, to_check, ) try: utils.wait_for_condition( condition=wait_condition, timeout=timeout, interval=interval, fail_on_api_error=fail_on_api_error, ) except TimeoutError: # If we time out here, we want to show all the conditions # that we weren't able to resolve in the error message, not # the 'wait for conditions' wrapper. raise TimeoutError( f'timed out wile waiting for conditions to be met: {to_check}', )