def _send_state_to_control_service(self, state_changes): context = LOG_SEND_TO_CONTROL_SERVICE( self.fsm.logger, connection=self.client, local_changes=list(state_changes), ) with context.context(): d = DeferredContext( self.client.callRemote(NodeStateCommand, state_changes=state_changes, eliot_context=context)) def record_acknowledged_state(ignored): self._last_acknowledged_state = state_changes def clear_acknowledged_state(failure): # We don't know if the control service has processed the update # or not. So we clear the last acknowledged state so that we # always send the state on the next iteration. self._last_acknowledged_state = None return failure d.addCallbacks(record_acknowledged_state, clear_acknowledged_state) d.addErrback(writeFailure, self.fsm.logger, u"Failed to send local state to control node.") return d.addActionFinish()
def _send_state_to_control_service(self, state_changes): context = LOG_SEND_TO_CONTROL_SERVICE( self.fsm.logger, connection=self.client, local_changes=list(state_changes), ) with context.context(): d = DeferredContext(self.client.callRemote( NodeStateCommand, state_changes=state_changes, eliot_context=context) ) def record_acknowledged_state(ignored): self._last_acknowledged_state = state_changes def clear_acknowledged_state(failure): # We don't know if the control service has processed the update # or not. So we clear the last acknowledged state so that we # always send the state on the next iteration. self._last_acknowledged_state = None return failure d.addCallbacks(record_acknowledged_state, clear_acknowledged_state) d.addErrback( writeFailure, self.fsm.logger, u"Failed to send local state to control node.") return d.addActionFinish()
def proxy(upstream, endpoint, header): """ Establish a new connection to ``endpoint`` and begin proxying between that connection and ``upstream``. :param IProtocol upstream: A connected protocol. All data received by this protocol from this point on will be sent along to another newly established connection. :param IStreamClientEndpoint endpoint: An endpoint to use to establish a new connection. All data received over this connection will be sent along to the upstream connection. :param bytes header: Some extra data to write to the new downstream connection before proxying begins. """ def failed(reason): upstream.transport.resumeProducing() upstream.transport.abortConnection() return reason upstream.transport.pauseProducing() peer = upstream.transport.getPeer() action = start_action( action_type=u"grid-router:proxy", **{u"from": (peer.host, peer.port)} ) with action.context(): d = DeferredContext(endpoint.connect(Factory.forProtocol(_Proxy))) d.addCallbacks( lambda downstream: DeferredContext(downstream.take_over(upstream, header)), failed, ) return d.addActionFinish()
def proxy(upstream, endpoint, header): """ Establish a new connection to ``endpoint`` and begin proxying between that connection and ``upstream``. :param IProtocol upstream: A connected protocol. All data received by this protocol from this point on will be sent along to another newly established connection. :param IStreamClientEndpoint endpoint: An endpoint to use to establish a new connection. All data received over this connection will be sent along to the upstream connection. :param bytes header: Some extra data to write to the new downstream connection before proxying begins. """ def failed(reason): upstream.transport.resumeProducing() upstream.transport.abortConnection() return reason upstream.transport.pauseProducing() peer = upstream.transport.getPeer() action = start_action(action_type=u"grid-router:proxy", **{u"from": (peer.host, peer.port)}) with action.context(): d = DeferredContext(endpoint.connect(Factory.forProtocol(_Proxy))) d.addCallbacks( lambda downstream: DeferredContext( downstream.take_over(upstream, header)), failed, ) return d.addActionFinish()
def create_stateful_container(self, node, count): """ Configure a stateful container to mount a new dataset, and wait for it to be running. """ with start_action( action_type=u'flocker:benchmark:create_stateful_container', node=unicode(node.uuid), count=count ): d = DeferredContext( self.client.create_dataset( primary=node.uuid, maximum_size=self.max_size, ) ) def start_container(dataset): volume = MountedDataset( dataset_id=dataset.dataset_id, mountpoint=self.mountpoint ) d = create_container( self.reactor, control_service=self.client, node_uuid=node.uuid, name=unicode(uuid4()), image=self.image, volumes=[volume], timeout=self.timeout) # If container creation fails, delete dataset as well def delete_dataset(failure): d = self.client.delete_dataset(dataset.dataset_id) d.addErrback(write_failure) d.addBoth(lambda _ignore: failure) return d d.addErrback(delete_dataset) return d d.addCallback(start_container) def update_container_count(container): self.container_count += 1 def update_error_count(failure): self.error_count += 1 failure.printTraceback(sys.stderr) write_failure(failure) d.addCallbacks(update_container_count, update_error_count) return d.addActionFinish()
def create_stateful_container(self, node, count): """ Configure a stateful container to mount a new dataset, and wait for it to be running. """ with start_action( action_type=u'flocker:benchmark:create_stateful_container', node=unicode(node.uuid), count=count ): d = DeferredContext( self.client.create_dataset( primary=node.uuid, maximum_size=self.max_size, ) ) def start_container(dataset): volume = MountedDataset( dataset_id=dataset.dataset_id, mountpoint=self.mountpoint ) d = create_container( self.reactor, control_service=self.client, node_uuid=node.uuid, name=unicode(uuid4()), image=self.image, volumes=[volume], timeout=self.timeout) # If container creation fails, delete dataset as well def delete_dataset(failure): d = self.client.delete_dataset(dataset.dataset_id) d.addErrback(write_failure) d.addBoth(lambda _ignore: failure) return d d.addErrback(delete_dataset) return d d.addCallback(start_container) def update_container_count(container): self.container_count += 1 def update_error_count(failure): self.error_count += 1 failure.printTraceback(sys.stderr) write_failure(failure) d.addCallbacks(update_container_count, update_error_count) return d.addActionFinish()
def output_START(self, context): """ Create a node. """ action = start_action( action_type="flocker_bb:ec2:start", name=self.identifier()) with action.context(): def thread_start(task_id): # Since loading the image metadata is done separately from # booting the node, it's possible the metadata here won't # actually match the metadata of the image the node ends up # running (someone could replace the image with a different one # between this call and the node being started). Since # generating images is currently a manual step, this probably # won't happen very often and if it does there's a person there # who can deal with it. Also it will be resolved after the # next node restart. It would be better to extract the image # metadata from the booted node, though. # FLOC-1905 with Action.continue_task(task_id=task_id): self.image_metadata = self.driver.get_image_metadata() return self.driver.create() d = DeferredContext( deferToThread(thread_start, action.serialize_task_id()) ) def started(node): self.node = node instance_metadata = { 'instance_id': node.id, 'instance_name': node.name, } self._fsm.receive(InstanceStarted( instance_id=node.id, image_metadata=self.image_metadata, instance_metadata=instance_metadata, )) self.instance_metadata = instance_metadata def failed(f): # We log the exception twice. # For Zulip log.err(f, "while starting %s" % (self.identifier(),)) self._fsm.receive(StartFailed()) # For eliot return f d.addCallbacks(started, failed) d.addActionFinish()
def _retry_exception_async(reactor, f, steps=(0.1,) * 10): """ Retry a function if it raises an exception. :return: Deferred that fires with whatever the function returns or the last raised exception if the function never succeeds. """ # Any failure is recorded and converted to False so that loop_until keeps # trying. Any success is recorded and converted to True so that # loop_until completes even if the result evaluates to False. # If loop_until() succeeds then the recorded result is returned, otherwise # the last recorded failure is returned. saved_failure = [None] saved_result = [None] def handle_success(result): saved_result[0] = result return True def handle_failure(failure): Message.log( message_type=( u"flocker:provision:libcloud:retry_exception:got_exception" ), ) write_failure(failure) saved_failure[0] = failure return False def make_call(): d = maybeDeferred(f) d = DeferredContext(d) d.addCallbacks(handle_success, errback=handle_failure) return d.result action = start_action( action_type=u"flocker:provision:libcloud:retry_exception", function=function_serializer(f), ) with action.context(): d = loop_until(reactor, make_call, steps) d = DeferredContext(d) d.addCallbacks( lambda _: saved_result[0], errback=lambda _: saved_failure[0], ) return d.addActionFinish()
def _retry_exception_async(reactor, f, steps=(0.1,) * 10): """ Retry a function if it raises an exception. :return: Deferred that fires with whatever the function returns or the last raised exception if the function never succeeds. """ # Any failure is recorded and converted to False so that loop_until keeps # trying. Any success is recorded and converted to True so that # loop_until completes even if the result evaluates to False. # If loop_until() succeeds then the recorded result is returned, otherwise # the last recorded failure is returned. saved_failure = [None] saved_result = [None] def handle_success(result): saved_result[0] = result return True def handle_failure(failure): Message.log( message_type=( u"flocker:provision:libcloud:retry_exception:got_exception" ), ) write_failure(failure) saved_failure[0] = failure return False def make_call(): d = maybeDeferred(f) d = DeferredContext(d) d.addCallbacks(handle_success, errback=handle_failure) return d.result action = start_action( action_type=u"flocker:provision:libcloud:retry_exception", function=function_serializer(f), ) with action.context(): d = loop_until(reactor, make_call, steps) d = DeferredContext(d) d.addCallbacks( lambda _: saved_result[0], errback=lambda _: saved_failure[0], ) return d.addActionFinish()
def g(self, name, req): # Bind the method to the instance so it has a better # fullyQualifiedName later on. This is not necessary on Python 3. bound_getChild = getChild.__get__(self, type(self)) action = start_action( action_type=u"allmydata:web:common-getChild", uri=req.uri, method=req.method, name=name, handler=fullyQualifiedName(bound_getChild), ) with action.context(): result = DeferredContext(maybeDeferred(bound_getChild, name, req)) result.addCallbacks( _getChild_done, _getChild_failed, callbackArgs=(self, ), ) result = result.addActionFinish() return DeferredResource(result)
def make_call(): d = maybeDeferred(f) d = DeferredContext(d) d.addCallbacks(handle_success, errback=handle_failure) return d.result
def create_nodes(self, reactor, names, distribution, metadata={}): """ Create nodes with the given names. :param reactor: The reactor. :param name: The names of the nodes. :type name: list of str :param str distribution: The name of the distribution to install on the nodes. :param dict metadata: Metadata to associate with the nodes. :return: A list of ``Deferred``s each firing with an INode when the corresponding node is created. The list has the same order as :param:`names`. """ size = self._default_size image_name = self._image_names[distribution] create_node_arguments = self._create_node_arguments() def handle_create_error(failure, name): # XXX This could be dangerous... What about a pre-existing # node with the same name (or even multiple nodes if the name # does not have to be unique)? Message.log( message_type="flocker:provision:libcloud:create_node:failed", node_name=name, ) write_failure(failure) d = self._async_cleanup_node_named(reactor, name) d.addCallback(lambda _: failure) return d def make_node(node): public_address = _filter_ipv4(node.public_ips)[0] if isinstance(public_address, unicode): public_address = public_address.encode("ascii") if self._use_private_addresses: private_address = _filter_ipv4(node.private_ips)[0] else: private_address = None if isinstance(private_address, unicode): private_address = private_address.encode("ascii") Message.log( message_type="flocker:provision:libcloud:node_created", name=node.name, id=node.id, public_address=public_address, private_address=private_address, ) return LibcloudNode( provisioner=self, node=node, address=public_address, private_address=private_address, distribution=distribution) action = start_action( action_type=u"flocker:provision:libcloud:create_nodes", instance_count=len(names), distribution=distribution, size=size, metadata=metadata, ) with action.context(): results = [] for name in names: Message.log( message_type=u"flocker:provision:libcloud:creating_node", node_name=name, ) d = maybeDeferred( self._driver.create_node, name=name, image=get_image(self._driver, image_name), size=get_size(self._driver, size), ex_keyname=self._keyname, ex_metadata=metadata, **create_node_arguments ) d = DeferredContext(d) d.addCallbacks( lambda node: self._wait_until_running(reactor, node), errback=handle_create_error, errbackArgs=(name,), ) d.addCallback(make_node) results.append(d.result) action_completion = DeferredContext(DeferredList(results)) action_completion.addActionFinish() # Individual results and errors should be consumed by the caller, # so we can leave action_completion alone now. return results
def make_call(): d = maybeDeferred(f) d = DeferredContext(d) d.addCallbacks(handle_success, errback=handle_failure) return d.result
def create_nodes(self, reactor, names, distribution, metadata={}): """ Create nodes with the given names. :param reactor: The reactor. :param name: The names of the nodes. :type name: list of str :param str distribution: The name of the distribution to install on the nodes. :param dict metadata: Metadata to associate with the nodes. :return: A list of ``Deferred``s each firing with an INode when the corresponding node is created. The list has the same order as :param:`names`. """ size = self._default_size image_name = self._image_names[distribution] create_node_arguments = self._create_node_arguments() def handle_create_error(failure, name): # XXX This could be dangerous... What about a pre-existing # node with the same name (or even multiple nodes if the name # does not have to be unique)? Message.log( message_type="flocker:provision:libcloud:create_node:failed", node_name=name, ) write_failure(failure) d = self._async_cleanup_node_named(reactor, name) d.addCallback(lambda _: failure) return d def make_node(node): public_address = _filter_ipv4(node.public_ips)[0] if isinstance(public_address, unicode): public_address = public_address.encode("ascii") if self._use_private_addresses: private_address = _filter_ipv4(node.private_ips)[0] else: private_address = None if isinstance(private_address, unicode): private_address = private_address.encode("ascii") Message.log( message_type="flocker:provision:libcloud:node_created", name=node.name, id=node.id, public_address=public_address, private_address=private_address, ) return LibcloudNode(provisioner=self, node=node, address=public_address, private_address=private_address, distribution=distribution) action = start_action( action_type=u"flocker:provision:libcloud:create_nodes", instance_count=len(names), distribution=distribution, size=size, metadata=metadata, ) with action.context(): results = [] for name in names: Message.log( message_type=u"flocker:provision:libcloud:creating_node", node_name=name, ) d = maybeDeferred(self._driver.create_node, name=name, image=get_image(self._driver, image_name), size=get_size(self._driver, size), ex_keyname=self._keyname, ex_metadata=metadata, **create_node_arguments) d = DeferredContext(d) d.addCallbacks( lambda node: self._wait_until_running(reactor, node), errback=handle_create_error, errbackArgs=(name, ), ) d.addCallback(make_node) results.append(d.result) action_completion = DeferredContext(DeferredList(results)) action_completion.addActionFinish() # Individual results and errors should be consumed by the caller, # so we can leave action_completion alone now. return results