def block_until_build_status(self, build_id, build_statuses, timeout=None, build_in_progress_callback=None): """ Poll the build status endpoint until the build status matches a set of allowed statuses :param build_id: The id of the build to wait for :type build_id: int :param build_statuses: A list of build statuses which we are waiting for. :type build_statuses: list[str] :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :type timeout: int | None :param build_in_progress_callback: A callback that will be called with the response data if the build has not yet finished. This would be useful, for example, for logging build progress. :type build_in_progress_callback: callable """ def is_build_finished(): response_data = self.get_build_status(build_id) build_data = response_data['build'] if build_data['status'] in build_statuses: return True if build_in_progress_callback: build_in_progress_callback(build_data) return False poll.wait_for(is_build_finished, timeout_seconds=timeout)
def test_build_status_returns_finished_after_all_subjobs_complete_and_slaves_finished(self): subjobs = self._create_subjobs(count=3) mock_project_type = self._create_mock_project_type() mock_slave = self._create_mock_slave(num_executors=3) build = Build(BuildRequest({})) build.prepare(subjobs, mock_project_type, self._create_job_config(self._FAKE_MAX_EXECUTORS)) build.allocate_slave(mock_slave) # all three subjobs are now "in progress" # Mock out call to create build artifacts after subjobs complete build._create_build_artifact = MagicMock() for subjob in subjobs: build.mark_subjob_complete(subjob.subjob_id()) # Note: this was never a unit test! We have to wait for a thread to complete post build # actions here. TODO: Fix this poll.wait_for(lambda: build._postbuild_tasks_are_finished, 5) # Verify build artifacts was called after subjobs completed build._create_build_artifact.assert_called_once_with() build.finish() status = build._status() self.assertTrue(build._subjobs_are_finished) self.assertTrue(build._postbuild_tasks_are_finished) self.assertTrue(build._teardowns_finished) self.assertEqual(status, BuildStatus.FINISHED)
def test_build_status_returns_finished_after_all_subjobs_complete_and_slaves_finished( self): subjobs = self._create_subjobs(count=3) mock_project_type = self._create_mock_project_type() mock_slave = self._create_mock_slave(num_executors=3) build = Build(BuildRequest({})) build.prepare(subjobs, mock_project_type, self._create_job_config()) build.allocate_slave( mock_slave) # all three subjobs are now "in progress" # Mock out call to create build artifacts after subjobs complete build._create_build_artifact = MagicMock() for subjob in subjobs: build.mark_subjob_complete(subjob.subjob_id()) # Note: this was never a unit test! We have to wait for a thread to complete post build # actions here. TODO: Fix this poll.wait_for(lambda: build._postbuild_tasks_are_finished, 5) # Verify build artifacts was called after subjobs completed build._create_build_artifact.assert_called_once_with() build.finish() status = build._status() self.assertTrue(build._subjobs_are_finished) self.assertTrue(build._postbuild_tasks_are_finished) self.assertTrue(build._teardowns_finished) self.assertEqual(status, BuildStatus.FINISHED)
def block_until_build_finished(self, build_id, timeout=None, build_in_progress_callback=None): """ Poll the build status endpoint until the build is finished or until the timeout is reached. :param build_id: The id of the build to wait for :type build_id: int :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :type timeout: int | None :param build_in_progress_callback: A callback that will be called with the response data if the build has not yet finished. This would be useful, for example, for logging build progress. :type build_in_progress_callback: callable """ def is_build_finished(): response_data = self.get_build_status(build_id) build_data = response_data['build'] if build_data['status'] in (BuildStatus.FINISHED, BuildStatus.ERROR, BuildStatus.CANCELED): return True if build_in_progress_callback: build_in_progress_callback(build_data) return False poll.wait_for(is_build_finished, timeout_seconds=timeout)
def block_until_idle(self, timeout=None): """ Poll the slave executor endpoint until all executors are idle. :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :type timeout: int | None """ def is_slave_idle(): response_data = self.get_slave_status() return response_data['slave']['current_build_id'] is None poll.wait_for(is_slave_idle, timeout_seconds=timeout)
def _validate_successful_deployment(self, master_service_url, slaves_to_validate): """ Poll the master's /slaves endpoint until either timeout or until all of the slaves have registered with the master. Throws exception upon timeout or API response error. :param master_service_url: the hostname:port for the running master service :type master_service_url: str :param slaves_to_validate: the list of slave hostnames (no ports) to deploy to :type slaves_to_validate: list[str] """ master_api = UrlBuilder(master_service_url, BuildRunner.API_VERSION) slave_api_url = master_api.url('slave') network = Network() def all_slaves_registered(): return len(self._non_registered_slaves(slave_api_url, slaves_to_validate, network)) == 0 if not wait_for( boolean_predicate=all_slaves_registered, timeout_seconds=self._SLAVE_REGISTRY_TIMEOUT_SEC, poll_period=1, exceptions_to_swallow=(requests.RequestException, requests.ConnectionError) ): try: non_registered_slaves = self._non_registered_slaves(slave_api_url, slaves_to_validate, network) except ConnectionError: self._logger.error('Error contacting {} on the master.'.format(slave_api_url)) raise SystemExit(1) self._logger.error('Slave registration timed out after {} sec, with slaves {} missing.'.format( self._SLAVE_REGISTRY_TIMEOUT_SEC, ','.join(non_registered_slaves))) raise SystemExit(1)
def block_until_build_queue_empty(self, timeout=15): """ This blocks until the master's build queue is empty. This data is exposed via the /queue endpoint and contains any jobs that are currently building or not yet started. If the queue is not empty before the timeout, this method raises an exception. :param timeout: The maximum number of seconds to block before raising an exception. :type timeout: int """ if self.master is None: return def is_queue_empty(): queue_resp = requests.get('{}/v1/queue'.format(self.master.url)) if queue_resp and queue_resp.ok: queue_data = queue_resp.json() if len(queue_data['queue']) == 0: return True # queue is empty, so master must be idle self._logger.info('Waiting on build queue to become empty.') return False queue_is_empty = poll.wait_for(is_queue_empty, timeout_seconds=timeout, poll_period=1, exceptions_to_swallow=(requests.ConnectionError, ValueError)) if not queue_is_empty: self._logger.error('Master queue did not become empty before timeout.') raise TestClusterTimeoutError('Master queue did not become empty before timeout.')
def _block_until_slaves_ready(self, timeout=15): """ Blocks until all slaves are ready and responsive. Repeatedly sends a GET request to each slave in turn until the slave responds. If all slaves do not become responsive within the timeout, raise an exception. :param timeout: Max number of seconds to wait before raising an exception :type timeout: int """ slaves_to_check = self.slaves.copy( ) # we'll remove slaves from this list as they become ready def are_all_slaves_ready(): for slave in slaves_to_check.copy( ): # copy list so we can modify the original list inside the loop if self._is_url_responsive(slave.url): slaves_to_check.remove(slave) else: return False return True all_slaves_are_ready = poll.wait_for(are_all_slaves_ready, timeout_seconds=timeout) num_slaves = len(self.slaves) num_ready_slaves = num_slaves - len(slaves_to_check) if not all_slaves_are_ready: raise TestClusterTimeoutError( 'All slaves did not start up before timeout. ' '{} of {} started successfully.'.format( num_ready_slaves, num_slaves))
def block_until_build_has_status( self, build_id: int, build_statuses: List[str], timeout: int=30, build_in_progress_callback: Optional[Callable]=None, ) -> bool: """ Poll the build status endpoint until the build status matches one of the specified statuses. :param build_id: The id of the build to wait for :param build_statuses: A list of build statuses which we are waiting for. :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :param build_in_progress_callback: A callback that will be called with the response data if the build has not yet finished. This would be useful, for example, for logging build progress. :return: Whether the build had one of the specified statuses within the timeout """ def build_has_specified_status(): response_data = self.get_build_status(build_id) build_data = response_data['build'] if build_data['status'] in build_statuses: return True if build_in_progress_callback: build_in_progress_callback(build_data) return False return poll.wait_for(build_has_specified_status, timeout_seconds=timeout)
def _validate_successful_deployment(self, master_service_url, slaves_to_validate): """ Poll the master's /slaves endpoint until either timeout or until all of the slaves have registered with the master. Throws exception upon timeout or API response error. :param master_service_url: the hostname:port for the running master service :type master_service_url: str :param slaves_to_validate: the list of slave hostnames (no ports) to deploy to :type slaves_to_validate: list[str] """ master_api = UrlBuilder(master_service_url, BuildRunner.API_VERSION) slave_api_url = master_api.url('slave') network = Network() def all_slaves_registered(): return len(self._registered_slave_hostnames(slave_api_url, network)) == len(slaves_to_validate) if not wait_for( boolean_predicate=all_slaves_registered, timeout_seconds=self._SLAVE_REGISTRY_TIMEOUT_SEC, poll_period=1, exceptions_to_swallow=(requests.RequestException, requests.ConnectionError) ): try: registered_slaves = self._registered_slave_hostnames(slave_api_url, network) non_registered_slaves = self._non_registered_slaves(registered_slaves, slaves_to_validate) except ConnectionError: self._logger.error('Error contacting {} on the master.'.format(slave_api_url)) raise SystemExit(1) self._logger.error('Slave registration timed out after {} sec, with slaves {} missing.'.format( self._SLAVE_REGISTRY_TIMEOUT_SEC, ','.join(non_registered_slaves))) raise SystemExit(1)
def block_until_build_has_status( self, build_id: int, build_statuses: List[str], timeout: int = 30, build_in_progress_callback: Optional[Callable] = None, ) -> bool: """ Poll the build status endpoint until the build status matches one of the specified statuses. :param build_id: The id of the build to wait for :param build_statuses: A list of build statuses which we are waiting for. :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :param build_in_progress_callback: A callback that will be called with the response data if the build has not yet finished. This would be useful, for example, for logging build progress. :return: Whether the build had one of the specified statuses within the timeout """ def build_has_specified_status(): response_data = self.get_build_status(build_id) build_data = response_data['build'] if build_data['status'] in build_statuses: return True if build_in_progress_callback: build_in_progress_callback(build_data) return False return poll.wait_for(build_has_specified_status, timeout_seconds=timeout)
def block_until_idle(self, timeout=None) -> bool: """ Poll the slave executor endpoint until all executors are idle. :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :type timeout: int | None :return: Whether the slave became idle during the timeout """ return poll.wait_for(self.is_slave_idle, timeout_seconds=timeout)
def block_until_n_slaves_dead(self, num_slaves, timeout): def are_n_slaves_dead(n): dead_slaves = [slave for slave in self.slaves if not slave.is_alive()] return len(dead_slaves) == n def are_slaves_dead(): are_n_slaves_dead(num_slaves) slaves_died_within_timeout = poll.wait_for(are_slaves_dead, timeout_seconds=timeout) return slaves_died_within_timeout
def _block_until_file_present(self, file_name: str, timeout: int=None) -> bool: """ Poll until file is created. :param file_name: Absolute path of the file to check :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the file is present """ def is_file_present(): return os.path.isfile(file_name) return poll.wait_for(is_file_present, timeout_seconds=timeout)
def _block_until_master_ready(self, timeout=10): """ Blocks until the master is ready and responsive. Repeatedly sends a GET request to the master until the master responds. If the master is not responsive within the timeout, raise an exception. :param timeout: Max number of seconds to wait before raising an exception :type timeout: int """ is_master_ready = functools.partial(self._is_url_responsive, self.master.url) master_is_ready = poll.wait_for(is_master_ready, timeout_seconds=timeout) if not master_is_ready: raise TestClusterTimeoutError('Master service did not start up before timeout.')
def block_until_slave_offline(self, slave_id: int, timeout: int=None) -> bool: """ Poll the build status endpoint until the build is no longer queued. :param slave_id: The id of the slave to wait for :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the slave went offline during the timeout """ def is_slave_offline(): slave_data = self.get_slave_status(slave_id) return not slave_data['is_alive'] return poll.wait_for(is_slave_offline, timeout_seconds=timeout)
def _block_until_file_present(self, file_name: str, timeout: int = None) -> bool: """ Poll until file is created. :param file_name: Absolute path of the file to check :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the file is present """ def is_file_present(): return os.path.isfile(file_name) return poll.wait_for(is_file_present, timeout_seconds=timeout)
def block_until_slave_offline(self, slave_id: int, timeout: int = None) -> bool: """ Poll the build status endpoint until the build is no longer queued. :param slave_id: The id of the slave to wait for :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the slave went offline during the timeout """ def is_slave_offline(): slave_data = self.get_slave_status(slave_id) return not slave_data['is_alive'] return poll.wait_for(is_slave_offline, timeout_seconds=timeout)
def block_until_n_slaves_marked_dead_in_master(self, num_slaves, timeout): def are_n_slaves_marked_dead_in_master(n): slaves_marked_dead = [ slave for slave in self.master_api_client.get_slaves().values() if isinstance(slave, list) and not slave[0].get('is_alive') ] return len(slaves_marked_dead) == n def are_slaves_marked_dead_in_master(): are_n_slaves_marked_dead_in_master(num_slaves) slaves_marked_dead_within_timeout = poll.wait_for( are_slaves_marked_dead_in_master, timeout_seconds=timeout) return slaves_marked_dead_within_timeout
def _block_until_process_is_killed(self, pid: int, timeout: int=None) -> bool: """ Poll until process with pid does not exists. :param pid: Process id of the process :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the process exists """ def check_pid(): """ Check For the existence of a unix pid. """ # Sending signal 0 to a pid will raise an OSError exception if the pid is not running, # and do nothing otherwise. try: os.kill(pid, 0) except OSError: return False return True return poll.wait_for(check_pid, timeout_seconds=timeout)
def block_until_build_queue_empty(self, timeout=60): """ This blocks until the master's build queue is empty. This data is exposed via the /queue endpoint and contains any jobs that are currently building or not yet started. If the queue is not empty before the timeout, this method raises an exception. :param timeout: The maximum number of seconds to block before raising an exception. :type timeout: int """ master_api = UrlBuilder(self._master_url) queue_url = master_api.url('queue') def is_queue_empty(): queue_resp = requests.get(queue_url) if queue_resp and queue_resp.ok: queue_data = queue_resp.json() if 'queue' in queue_data and len(queue_data['queue']) == 0: return True return False if not poll.wait_for(is_queue_empty, timeout, 0.5): raise Exception('Master service did not become idle before timeout.')
def _block_until_process_is_killed(self, pid: int, timeout: int = None) -> bool: """ Poll until process with pid does not exists. :param pid: Process id of the process :param timeout: The maximum number of seconds to wait until giving up, or None for no timeout :return: Whether the process exists """ def check_pid(): """ Check For the existence of a unix pid. """ # Sending signal 0 to a pid will raise an OSError exception if the pid is not running, # and do nothing otherwise. try: os.kill(pid, 0) except OSError: return False return True return poll.wait_for(check_pid, timeout_seconds=timeout)
def _block_until_slaves_ready(self, timeout=15): """ Blocks until all slaves are ready and responsive. Repeatedly sends a GET request to each slave in turn until the slave responds. If all slaves do not become responsive within the timeout, raise an exception. :param timeout: Max number of seconds to wait before raising an exception :type timeout: int """ slaves_to_check = self.slaves.copy() # we'll remove slaves from this list as they become ready def are_all_slaves_ready(): for slave in slaves_to_check.copy(): # copy list so we can modify the original list inside the loop if self._is_url_responsive(slave.url): slaves_to_check.remove(slave) else: return False return True all_slaves_are_ready = poll.wait_for(are_all_slaves_ready, timeout_seconds=timeout) num_slaves = len(self.slaves) num_ready_slaves = num_slaves - len(slaves_to_check) if not all_slaves_are_ready: raise TestClusterTimeoutError('All slaves did not start up before timeout. ' '{} of {} started successfully.'.format(num_ready_slaves, num_slaves))