def cleanup_cluster(client, timeout=None): """ Delete all containers and datasets in the given cluster. :param FlockerClient client: The API client instance for the cluster. :param timeout: A timeout in seconds for waiting until the deletions take effect if not ``None``, otherwise there is no waiting. :type timeout: int or None :returns: Deferred that fires when the clean up is complete if :param:`timeout` is not None, otherwise the Deferred fires when the deletion requests are aknowledged. """ containers_configuration = yield client.list_containers_configuration() results = [] for container in containers_configuration: print "deleting container", container.name results.append(client.delete_container(container.name)) yield gather_deferreds(results) datasets_configuration = yield client.list_datasets_configuration() results = [] for dataset in datasets_configuration: print "deleting dataset with id", dataset.dataset_id results.append(client.delete_dataset(dataset.dataset_id)) yield gather_deferreds(results) if timeout is not None: print "waiting for all containers to get deleted" yield loop_until( client._reactor, lambda: client.list_containers_state(). addCallback(lambda containers: not containers), repeat(1, timeout)) print "waiting for all datasets to get deleted" yield loop_until( client._reactor, lambda: client.list_datasets_state().addCallback( lambda datasets: not datasets), repeat(1, timeout))
def deploy_and_wait_for_creation(self): """ Function that will deploy the new configuration (create all the dataset and container requested) and will only return once all of them have been created. """ yield self.deploy() yield loop_until(self.reactor, self.is_datasets_deployment_complete, repeat(1, self.timeout)) yield loop_until(self.reactor, self.is_container_deployment_complete, repeat(1, self.timeout))
def wait_for_nodes(reactor, client, count): """ Wait until nodes join the cluster. :param reactor: The reactor. :param flocker.apiclient.FlockerClient client: The client connected to the cluster (its control node). :param int count: The expected number of nodes in the cluster. :return: ``Deferred`` firing when the number of nodes in the cluster reaches the target. """ def got_all_nodes(): d = client.list_nodes() d.addErrback(write_failure) def check_node_count(nodes): print("Waiting for nodes, " "got {} out of {}".format(len(nodes), count)) return len(nodes) >= count d.addCallback(check_node_count) return d return loop_until(reactor, got_all_nodes, repeat(1, 120))
def run_scenario(self, result): """ :return: A Deferred that fires when the desired scenario is established (e.g. that a certain load is being applied). :raise RequestRateNotReached: if the target rate could not be reached. """ self.loop.start(interval=1) def reached_target_rate(): return self.rate_measurer.rate() >= self.rate_tolerated def handle_timeout(failure): failure.trap(CancelledError) raise RequestRateNotReached() waiting_for_target_rate = loop_until(self.reactor, reached_target_rate, repeat(1)) timeout(self.reactor, waiting_for_target_rate, self.timeout) waiting_for_target_rate.addErrback(handle_timeout) # Start monitoring the scenario as soon as the target rate is reached. def monitor_scenario_status(result): self.monitor_loop.start(interval=1) waiting_for_target_rate.addCallback(monitor_scenario_status) return waiting_for_target_rate
def stop(self): """ Stop the scenario from being maintained by stopping all the loops that may be executing. :return Deferred[Optional[Dict[unicode, Any]]]: Scenario metrics. """ self.is_started = False if self.monitor_loop.running: self.monitor_loop.stop() if self.loop.running: self.loop.stop() outstanding_requests = self.rate_measurer.outstanding() if outstanding_requests > 0: msg = ( "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete." ).format(num_requests=outstanding_requests, num_seconds=self.timeout) Message.log(key="outstanding_requests", value=msg) with start_action(action_type=u"flocker:benchmark:scenario:stop", scenario="request_load"): def no_outstanding_requests(): return self.rate_measurer.outstanding() == 0 scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1)) timeout(self.reactor, scenario_stopped, self.timeout) scenario = DeferredContext(scenario_stopped) def handle_timeout(failure): failure.trap(CancelledError) msg = ("Force stopping the scenario. " "There are {num_requests} outstanding requests").format( num_requests=outstanding_requests ) Message.log(key="force_stop_request", value=msg) scenario.addErrback(handle_timeout) def scenario_cleanup(ignored): """ Calls the scenario cleanup, and wraps it inside an eliot start action, so we can see the logs if something goes wrong within the cleanup :return Deferred: that will fire once the cleanup has been completed """ with start_action(action_type=u"flocker:benchmark:scenario:cleanup", scenario="request_load"): return self.request.run_cleanup() scenario.addBoth(scenario_cleanup) def return_metrics(_ignore): return self.rate_measurer.get_metrics() scenario.addCallback(return_metrics) return scenario.addActionFinish()
def start(self): """ :return: A Deferred that fires when the desired scenario is established (e.g. that a certain load is being applied). """ self.loop.start(interval=1) def reached_target_rate(): return self.rate_measurer.rate() >= self.request_rate def handle_timeout(failure): failure.trap(CancelledError) raise RequestRateNotReached waiting_for_target_rate = loop_until(self.reactor, reached_target_rate, repeat(1)) timeout(self.reactor, waiting_for_target_rate, self.timeout) waiting_for_target_rate.addErrback(handle_timeout) # Start monitoring the scenario as soon as the target rate is reached. def monitor_scenario_status(result): self.monitor_loop.start(interval=1) waiting_for_target_rate.addCallback(monitor_scenario_status) return waiting_for_target_rate
def loop_until_state_found(reactor, get_states, state_matches, timeout): """ Loop until a state has been reached. :param get_states: Callable returning a Deferred firing with a list of states. :param state_matches: Callable that accepts a state parameter, and returns a boolean indicating whether the state matches. :param timedelta timeout: Maximum time to wait for state to be found. :return Deferred[Any]: The matching state. """ def state_reached(): d = get_states() def find_match(states): for state in states: if state_matches(state): return state return None d.addCallback(find_match) return d d = loop_until(reactor, state_reached) _timeout(reactor, d, timeout.total_seconds()) return d
def loop_until_state_found(reactor, get_states, state_matches, timeout): """ Loop until a state has been reached. :param IReactorTime reactor: Twisted Reactor. :param get_states: Callable returning a Deferred firing with a list of states. :param state_matches: Callable that accepts a state parameter, and returns a boolean indicating whether the state matches. :param timedelta timeout: Maximum time to wait for state to be found. :return Deferred[Any]: The matching state. """ def state_reached(): d = get_states() def find_match(states): for state in states: if state_matches(state): return state return None d.addCallback(find_match) return d d = loop_until(reactor, state_reached) _timeout(reactor, d, timeout.total_seconds()) return d
def _wait_for_postgres(self, server_ip): """ Try to connect to the PostgreSQL server at ``server_ip`` once per second until the server responds. :param bytes server_ip: The IP address of the PostgreSQL server. :returns: The result of the query if the query succeeds. :raises: LoopExceeded if the query does not succeed after 10 connection attempts. """ def trap(failure): failure.trap(ProcessTerminated) # psql returns 0 to the shell if it finished normally, 1 if a fatal # error of its own occurs (e.g. out of memory, file not found), 2 # if the connection to the server went bad and the session was not # interactive, and 3 if an error occurred in a script and the # variable ON_ERROR_STOP was set. # http://www.postgresql.org/docs/9.3/static/app-psql.html if failure.value.exitCode == 2: return False else: return failure def predicate(): d = remote_postgres( self.client_node_ip, server_ip, 'SELECT 1' ) d.addErrback(trap) return d return loop_until( reactor, predicate, repeat(1, 10) )
def wait_for_stack_status(stack_id, target_status, aws_config): """ Poll the status of a CloudFormation stack. :param unicode stack_id: The AWS cloudformation stack ID. :param unicode target_status: The desired stack status. :param dict aws_config: environment variables to be merged with the current process environment before running the ``aws`` sub-command. :returns: A ``Deferred`` which fires when the stack has ``target_status``. """ def predicate(): stack_report = get_stack_report(stack_id, aws_config) current_status = stack_report['StackStatus'] Message.log( function='wait_for_stack_status', stack_id=stack_id, target_status=target_status, current_status=current_status ) if current_status == target_status: return stack_report return loop_until(reactor, predicate, repeat(10, 120))
def run_scenario(self, dataset): """ :param dataset `Dataset` we will use to run the write scenario :return: A `Deferred` that fires when the desired scenario is established (e.g. that a certain load is being applied). """ # The fist thing we need to do before actually running the scenario # is to update the dataset_id, as we need the information to do the # write requests to generate the load self.dataset_id = dataset.dataset_id self.loop.start(interval=1) def reached_target_rate(): return self.rate_measurer.rate() >= self.request_rate def handle_timeout(failure): failure.trap(CancelledError) raise WRequestRateNotReached # Loop until we reach the expected rate, or we timeout waiting_for_target_rate = loop_until(self.reactor, reached_target_rate, repeat(1)) timeout(self.reactor, waiting_for_target_rate, self.timeout) waiting_for_target_rate.addErrback(handle_timeout) # Start monitoring the scenario as soon as the target rate is reached. def monitor_scenario_status(result): self.monitor_loop.start(interval=1) waiting_for_target_rate.addCallback(monitor_scenario_status) return waiting_for_target_rate
def cleanup_cluster(client, timeout=None): """ Delete all containers and datasets in the given cluster. :param FlockerClient client: The API client instance for the cluster. :param timeout: A timeout in seconds for waiting until the deletions take effect if not ``None``, otherwise there is no waiting. :type timeout: int or None :returns: Deferred that fires when the clean up is complete if :param:`timeout` is not None, otherwise the Deferred fires when the deletion requests are aknowledged. """ containers_configuration = yield client.list_containers_configuration() results = [] for container in containers_configuration: print "deleting container", container.name results.append(client.delete_container(container.name)) yield gather_deferreds(results) datasets_configuration = yield client.list_datasets_configuration() results = [] for dataset in datasets_configuration: print "deleting dataset with id", dataset.dataset_id results.append(client.delete_dataset(dataset.dataset_id)) yield gather_deferreds(results) if timeout is not None: print "waiting for all containers to get deleted" yield loop_until( client._reactor, lambda: client.list_containers_state().addCallback( lambda containers: not containers ), repeat(1, timeout) ) print "waiting for all datasets to get deleted" yield loop_until( client._reactor, lambda: client.list_datasets_state().addCallback( lambda datasets: not datasets ), repeat(1, timeout) )
def _configure(reactor, cluster, configuration): """ Configure the cluster with the given deployment configuration. :param reactor: The reactor to use. :param flocker.provision._common.Cluster cluster: The target cluster. :param dict configuration: The deployment configuration. :return Deferred: Deferred that fires when the configuration is pushed to the cluster's control agent. """ base_url = b"https://{}:{}/v1".format(cluster.control_node.address, REST_API_PORT) certificates_path = cluster.certificates_path cluster_cert = certificates_path.child(b"cluster.crt") user_cert = certificates_path.child(b"user.crt") user_key = certificates_path.child(b"user.key") body = dumps(configuration) treq_client = treq_with_authentication(reactor, cluster_cert, user_cert, user_key) def got_all_nodes(): d = treq_client.get(base_url + b"/state/nodes", persistent=False) d.addCallback(check_and_decode_json, OK) d.addCallback(lambda nodes: len(nodes) >= len(cluster.agent_nodes)) d.addErrback(write_failure, logger=None) return d got_nodes = loop_until(reactor, got_all_nodes, repeat(1, 300)) def do_configure(_): posted = treq_client.post( base_url + b"/configuration/_compose", data=body, headers={b"content-type": b"application/json"}, persistent=False) def got_response(response): if response.code != OK: d = json_content(response) def got_error(error): if isinstance(error, dict): error = error[u"description"] + u"\n" else: error = u"Unknown error: " + unicode(error) + "\n" raise ResponseError(response.code, error) d.addCallback(got_error) return d posted.addCallback(got_response) return posted configured = got_nodes.addCallback(do_configure) return configured
def stop(self): """ Stop the scenario from being maintained by stopping all the loops that may be executing. :return Deferred[Optional[Dict[unicode, Any]]]: Scenario metrics. """ self.is_started = False if self.monitor_loop.running: self.monitor_loop.stop() if self.loop.running: self.loop.stop() outstanding_requests = self.rate_measurer.outstanding() if outstanding_requests > 0: msg = ( "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete." ).format( num_requests=outstanding_requests, num_seconds=self.timeout ) Message.log(key='outstanding_requests', value=msg) with start_action( action_type=u'flocker:benchmark:scenario:stop', scenario='request_load' ): def no_outstanding_requests(): return self.rate_measurer.outstanding() == 0 scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1)) timeout(self.reactor, scenario_stopped, self.timeout) scenario = DeferredContext(scenario_stopped) def handle_timeout(failure): failure.trap(CancelledError) msg = ( "Force stopping the scenario. " "There are {num_requests} outstanding requests" ).format( num_requests=outstanding_requests ) Message.log(key='force_stop_request', value=msg) scenario.addErrback(handle_timeout) def return_metrics(_ignore): return self.rate_measurer.get_metrics() scenario.addCallback(return_metrics) return scenario.addActionFinish()
def stop(self): """ Stop the scenario from being maintained by stopping all the loops that may be executing. :return: A ``Deferred`` that fires when the scenario has stopped. """ self.is_started = False if self.monitor_loop.running: self.monitor_loop.stop() if self.loop.running: self.loop.stop() outstanding_requests = self.rate_measurer.outstanding() if outstanding_requests > 0: msg = ( "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete." ).format( num_requests=outstanding_requests, num_seconds=self.timeout ) Message.log(key='outstanding_requests', value=msg) with start_action( action_type=u'flocker:benchmark:scenario:stop', scenario='request_load' ): def handle_timeout(failure): failure.trap(CancelledError) msg = ( "Force stopping the scenario. " "There are {num_requests} outstanding requests" ).format( num_requests=outstanding_requests ) Message.log(key='force_stop_request', value=msg) def no_outstanding_requests(): return self.rate_measurer.outstanding() == 0 scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1)) timeout(self.reactor, scenario_stopped, self.timeout) scenario_stopped.addErrback(handle_timeout) scenario = DeferredContext(scenario_stopped) scenario.addActionFinish() return scenario.result
def stop(self): """ Stop the scenario from being maintained by stopping all the loops that may be executing. :return: A Deferred that fires when the scenario has stopped. """ if self.monitor_loop.running: self.monitor_loop.stop() if self.loop.running: self.loop.stop() outstanding_requests = self.rate_measurer.outstanding() if outstanding_requests > 0: msg = ( "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete." ).format( num_requests=outstanding_requests, num_seconds=self.timeout ) Message.log(key='outstanding_requests', value=msg) with start_action( action_type=u'flocker:benchmark:scenario:stop', scenario='write_request_load' ): def handle_timeout(failure): failure.trap(CancelledError) msg = ( "Force stopping the scenario. " "There are {num_requests} outstanding requests" ).format( num_requests=outstanding_requests ) Message.log(key='force_stop_request', value=msg) def no_outstanding_requests(): return self.rate_measurer.outstanding() == 0 scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1)) timeout(self.reactor, scenario_stopped, self.timeout) scenario_stopped.addErrback(handle_timeout) scenario = DeferredContext(scenario_stopped) scenario.addActionFinish() return scenario.result
def loop_until_state_found(reactor, get_states, state_matches): """ Loop until a state has been reached. :param get_states: Callable returning a Deferred firing with a list of states. :param state_matches: Callable that accepts a state parameter, and returns a boolean indicating whether the state matches. :return Deferred[Any]: The matching state. """ def state_reached(): d = get_states() def find_match(states): for state in states: if state_matches(state): return state return None d.addCallback(find_match) return d return loop_until(reactor, state_reached)
def loop_until_container_removed(_ignore): return loop_until(reactor, partial(container_removed, container))
def stop(self): """ Stop the scenario from being maintained by stopping all the loops that may be executing. :return Deferred[Optional[Dict[unicode, Any]]]: Scenario metrics. """ self.is_started = False if self.monitor_loop.running: self.monitor_loop.stop() if self.loop.running: self.loop.stop() outstanding_requests = self.rate_measurer.outstanding() if outstanding_requests > 0: msg = ( "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete." ).format( num_requests=outstanding_requests, num_seconds=self.timeout ) Message.log(key='outstanding_requests', value=msg) with start_action( action_type=u'flocker:benchmark:scenario:stop', scenario='request_load' ): def no_outstanding_requests(): return self.rate_measurer.outstanding() == 0 scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1)) timeout(self.reactor, scenario_stopped, self.timeout) scenario = DeferredContext(scenario_stopped) def handle_timeout(failure): failure.trap(CancelledError) msg = ( "Force stopping the scenario. " "There are {num_requests} outstanding requests" ).format( num_requests=outstanding_requests ) Message.log(key='force_stop_request', value=msg) scenario.addErrback(handle_timeout) def scenario_cleanup(ignored): """ Calls the scenario cleanup, and wraps it inside an eliot start action, so we can see the logs if something goes wrong within the cleanup :return Deferred: that will fire once the cleanup has been completed """ with start_action( action_type=u'flocker:benchmark:scenario:cleanup', scenario='request_load' ): return self.request.run_cleanup() scenario.addBoth(scenario_cleanup) def return_metrics(_ignore): return self.rate_measurer.get_metrics() scenario.addCallback(return_metrics) return scenario.addActionFinish()
def loop_until_converged(expected): return loop_until(self.reactor, partial(self._converged, expected))
def _configure(reactor, cluster, configuration): """ Configure the cluster with the given deployment configuration. :param reactor: The reactor to use. :param flocker.provision._common.Cluster cluster: The target cluster. :param dict configuration: The deployment configuration. :return Deferred: Deferred that fires when the configuration is pushed to the cluster's control agent. """ base_url = b"https://{}:{}/v1".format( cluster.control_node.address, REST_API_PORT ) certificates_path = cluster.certificates_path cluster_cert = certificates_path.child(b"cluster.crt") user_cert = certificates_path.child(b"user.crt") user_key = certificates_path.child(b"user.key") body = dumps(configuration) treq_client = treq_with_authentication( reactor, cluster_cert, user_cert, user_key) def got_all_nodes(): d = treq_client.get( base_url + b"/state/nodes", persistent=False ) d.addCallback(check_and_decode_json, OK) d.addCallback( lambda nodes: len(nodes) >= len(cluster.agent_nodes) ) d.addErrback(write_failure, logger=None) return d got_nodes = loop_until(reactor, got_all_nodes, repeat(1, 300)) def do_configure(_): posted = treq_client.post( base_url + b"/configuration/_compose", data=body, headers={b"content-type": b"application/json"}, persistent=False ) def got_response(response): if response.code != OK: d = json_content(response) def got_error(error): if isinstance(error, dict): error = error[u"description"] + u"\n" else: error = u"Unknown error: " + unicode(error) + "\n" raise ResponseError(response.code, error) d.addCallback(got_error) return d posted.addCallback(got_response) return posted configured = got_nodes.addCallback(do_configure) return configured