def convergence_remove_server_from_group(log, transaction_id, server_id, replace, purge, group, state): """ Remove a specific server from the group, optionally decrementing the desired capacity. The server may just be scheduled for deletion, or it may be evicted from the group by removing otter-specific metdata from the server. :param log: A bound logger :param bytes trans_id: The transaction id for this operation. :param bytes server_id: The id of the server to be removed. :param bool replace: Should the server be replaced? :param bool purge: Should the server be deleted from Nova? :param group: The scaling group to remove a server from. :type group: :class:`~otter.models.interface.IScalingGroup` :param state: The current state of the group. :type state: :class:`~otter.models.interface.GroupState` :return: The updated state. :rtype: Effect of :class:`~otter.models.interface.GroupState` :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot be deleted without replacement, and :class:`ServerNotFoundError` if there is no such server to be deleted. """ effects = [_is_server_in_group(group, server_id)] if not replace: effects.append(_can_scale_down(group, server_id)) # the (possibly) two checks can happen in parallel, but we want # ServerNotFoundError to take precedence over # CannotDeleteServerBelowMinError both_checks = yield parallel_all_errors(effects) for is_error, result in both_checks: if is_error: reraise(*result) # Remove the server if purge: eff = set_nova_metadata_item(server_id, *DRAINING_METADATA) else: eff = Effect( EvictServerFromScalingGroup(log=log, transaction_id=transaction_id, scaling_group=group, server_id=server_id)) yield Effect( TenantScope( retry_effect(eff, retry_times(3), exponential_backoff_interval(2)), group.tenant_id)) if not replace: yield do_return(assoc_obj(state, desired=state.desired - 1)) else: yield do_return(state)
def convergence_remove_server_from_group( log, transaction_id, server_id, replace, purge, group, state): """ Remove a specific server from the group, optionally decrementing the desired capacity. The server may just be scheduled for deletion, or it may be evicted from the group by removing otter-specific metdata from the server. :param log: A bound logger :param bytes trans_id: The transaction id for this operation. :param bytes server_id: The id of the server to be removed. :param bool replace: Should the server be replaced? :param bool purge: Should the server be deleted from Nova? :param group: The scaling group to remove a server from. :type group: :class:`~otter.models.interface.IScalingGroup` :param state: The current state of the group. :type state: :class:`~otter.models.interface.GroupState` :return: The updated state. :rtype: Effect of :class:`~otter.models.interface.GroupState` :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot be deleted without replacement, and :class:`ServerNotFoundError` if there is no such server to be deleted. """ effects = [_is_server_in_group(group, server_id)] if not replace: effects.append(_can_scale_down(group, server_id)) # the (possibly) two checks can happen in parallel, but we want # ServerNotFoundError to take precedence over # CannotDeleteServerBelowMinError both_checks = yield parallel_all_errors(effects) for is_error, result in both_checks: if is_error: reraise(*result) # Remove the server if purge: eff = set_nova_metadata_item(server_id, *DRAINING_METADATA) else: eff = Effect( EvictServerFromScalingGroup(log=log, transaction_id=transaction_id, scaling_group=group, server_id=server_id)) yield Effect(TenantScope( retry_effect(eff, retry_times(3), exponential_backoff_interval(2)), group.tenant_id)) if not replace: yield do_return(assoc_obj(state, desired=state.desired - 1)) else: yield do_return(state)
def _acquire_loop(self, blocking, timeout): acquired = yield self.is_acquired_eff() if acquired or not blocking: yield do_return(acquired) start = yield Effect(Func(time.time)) while True: yield Effect(Delay(self._interval)) if (yield self.is_acquired_eff()): yield do_return(True) if timeout is not None: now = yield Effect(Func(time.time)) if now - start > timeout: raise LockTimeout("Failed to acquire lock on {} in {} seconds".format(self.path, now - start))
def step(state): yield display(render(state)) try: user_input = yield Effect(Prompt("> ")) cmd, arg = parse(user_input) result = dispatch(state, cmd, arg) yield display("Okay.") yield do_return(result) except (EOFError, KeyboardInterrupt): yield display("\nThanks for playing!") sys.exit(0) except Exception as e: yield display(str(e)) yield do_return(state)
def _acquire_loop(self, blocking, timeout): acquired = yield self.is_acquired_eff() if acquired or not blocking: yield do_return(acquired) start = yield Effect(Func(time.time)) while True: yield Effect(Delay(self._interval)) if (yield self.is_acquired_eff()): yield do_return(True) if timeout is not None: now = yield Effect(Func(time.time)) if now - start > timeout: raise LockTimeout( "Failed to acquire lock on {} in {} seconds".format( self.path, now - start))
def is_acquired_eff(self): """ Effect implementation of ``is_acquired``. :return: ``Effect`` of ``bool`` """ if self._node is None: yield do_return(False) children = yield Effect(GetChildren(self.path)) if not children: yield do_return(False) # The last 10 characters are sequence number as per # https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming basename = self._node.rsplit("/")[-1] yield do_return(sorted(children, key=lambda c: c[-10:])[0] == basename)
def _execute_steps(steps): """ Given a set of steps, executes them, logs the result, and returns the worst priority with a list of reasons for that result. :return: a tuple of (:class:`StepResult` constant., list of reasons) """ if len(steps) > 0: results = yield steps_to_effect(steps) severity = [ StepResult.FAILURE, StepResult.RETRY, StepResult.LIMITED_RETRY, StepResult.SUCCESS ] priority = sorted(results, key=lambda (status, reasons): severity.index(status)) worst_status = priority[0][0] results_to_log = [{ 'step': step, 'result': result, 'reasons': map(structure_reason, reasons) } for step, (result, reasons) in zip(steps, results)] reasons = reduce(operator.add, (x[1] for x in results if x[0] == worst_status)) else: worst_status = StepResult.SUCCESS results_to_log = reasons = [] yield msg('execute-convergence-results', results=results_to_log, worst_status=worst_status.name) yield do_return((worst_status, reasons))
def handle_take(request): item_name = request.form['item_name'] state = yield Effect(LoadGame()) st = take(state, item_name) if st is not None: yield Effect(SaveGame(state=st)) yield do_return(redirect(url_for('root')))
def handle_move(request): exit_name = request.form['exit_name'] state = yield Effect(LoadGame()) st = move(state, exit_name) if st is not None: yield Effect(SaveGame(state=st)) yield do_return(redirect(url_for('root')))
def convergence_exec_data(tenant_id, group_id, now, get_executor): """ Get data required while executing convergence """ sg_eff = Effect(GetScalingGroupInfo(tenant_id=tenant_id, group_id=group_id)) (scaling_group, manifest) = yield sg_eff group_state = manifest['state'] launch_config = manifest['launchConfiguration'] executor = get_executor(launch_config) resources = yield executor.gather(tenant_id, group_id, now) if group_state.status == ScalingGroupStatus.DELETING: desired_capacity = 0 else: desired_capacity = group_state.desired yield executor.update_cache(scaling_group, now, **resources) desired_group_state = executor.get_desired_group_state( group_id, launch_config, desired_capacity) yield do_return( (executor, scaling_group, group_state, desired_group_state, resources))
def get_scaling_group_servers(tenant_id, group_id, now, all_as_servers=get_all_scaling_group_servers, all_servers=get_all_server_details, cache_class=CassScalingGroupServersCache): """ Get a group's servers taken from cache if it exists. Updates cache if it is empty from newly fetched servers # NOTE: This function takes tenant_id even though the whole effect is # scoped on the tenant because cache calls require tenant_id. Should # they also not take tenant_id and work on the scope? :return: Servers as list of dicts :rtype: Effect """ cache = cache_class(tenant_id, group_id) cached_servers, last_update = yield cache.get_servers(False) if last_update is None: servers = (yield all_as_servers()).get(group_id, []) else: current = yield all_servers() servers = mark_deleted_servers(cached_servers, current) servers = list(filter(server_of_group(group_id), servers)) yield do_return(servers)
def acquire_eff(self, blocking, timeout): """ Effect implementation of ``acquire`` method. :return: ``Effect`` of ``bool`` """ try: # Before acquiring, lets delete any child node which may be # lingering from previous acquire. Ideally this should happen only # when acquire is called again before release. This shouldn't # happen this is called after release or after is_acquired returns # False. In any case, its the safest thing to do yield self.release_eff() try: yield Effect(CreateNode(self.path)) except NodeExistsError: pass prefix = yield Effect(Func(uuid.uuid4)) # TODO: https://github.com/rackerlabs/otter/issues/1926 create_intent = CreateNode( "{}/{}".format(self.path, prefix), value=self.identifier, ephemeral=True, sequence=True ) self._node = yield Effect(create_intent) acquired = yield self._acquire_loop(blocking, timeout) if not acquired: yield self.release_eff() yield do_return(acquired) except Exception as e: yield self.release_eff() raise e
def convergence_exec_data(tenant_id, group_id, now, get_executor): """ Get data required while executing convergence """ sg_eff = Effect(GetScalingGroupInfo(tenant_id=tenant_id, group_id=group_id)) (scaling_group, manifest) = yield sg_eff group_state = manifest['state'] launch_config = manifest['launchConfiguration'] executor = get_executor(launch_config) resources = yield executor.gather(tenant_id, group_id, now) if group_state.status == ScalingGroupStatus.DELETING: desired_capacity = 0 else: desired_capacity = group_state.desired # See [Convergence servers cache] comment on top of the file. yield executor.update_cache(scaling_group, now, **resources) desired_group_state = executor.get_desired_group_state( group_id, launch_config, desired_capacity) yield do_return((executor, scaling_group, group_state, desired_group_state, resources))
def convergence_failed(tenant_id, group_id, reasons, timedout=False): """ Handle convergence failure :param str tenant_id: Tenant ID :param str group_id: Group ID :param reasons: List of :obj:`ErrorReason` objects :param bool timedout: Has convergence failed due to reason timing out? :return: convergence execution status :rtype: :obj:`ConvergenceIterationStatus` """ yield Effect(LoadAndUpdateGroupStatus(tenant_id, group_id, ScalingGroupStatus.ERROR)) presented_reasons = sorted(present_reasons(reasons)) if len(presented_reasons) == 0: presented_reasons = [u"Unknown error occurred"] elif timedout: presented_reasons = ["Timed out: {}".format(reason) for reason in presented_reasons] yield cf_err( 'group-status-error', status=ScalingGroupStatus.ERROR.name, reasons=presented_reasons) yield Effect(UpdateGroupErrorReasons(tenant_id, group_id, presented_reasons)) yield do_return(ConvergenceIterationStatus.Stop())
def _execute_steps(steps): """ Given a set of steps, executes them, logs the result, and returns the worst priority with a list of reasons for that result. :return: a tuple of (:class:`StepResult` constant, list of :obj:`ErrorReason`) """ if len(steps) > 0: results = yield steps_to_effect(steps) severity = [StepResult.FAILURE, StepResult.RETRY, StepResult.LIMITED_RETRY, StepResult.SUCCESS] priority = sorted(results, key=lambda (status, reasons): severity.index(status)) worst_status = priority[0][0] results_to_log = [ {'step': step, 'result': result, 'reasons': map(structure_reason, reasons)} for step, (result, reasons) in zip(steps, results) ] reasons = reduce(operator.add, (x[1] for x in results if x[0] == worst_status)) else: worst_status = StepResult.SUCCESS results_to_log = reasons = [] yield msg('execute-convergence-results', results=results_to_log, worst_status=worst_status.name) yield do_return((worst_status, reasons))
def acquire_eff(self, blocking, timeout): """ Effect implementation of ``acquire`` method. :return: ``Effect`` of ``bool`` """ try: # Before acquiring, lets delete any child node which may be # lingering from previous acquire. Ideally this should happen only # when acquire is called again before release. This shouldn't # happen this is called after release or after is_acquired returns # False. In any case, its the safest thing to do yield self.release_eff() try: yield Effect(CreateNode(self.path)) except NodeExistsError: pass prefix = yield Effect(Func(uuid.uuid4)) # TODO: https://github.com/rackerlabs/otter/issues/1926 create_intent = CreateNode("{}/{}".format(self.path, prefix), value=self.identifier, ephemeral=True, sequence=True) self._node = yield Effect(create_intent) acquired = yield self._acquire_loop(blocking, timeout) if not acquired: yield self.release_eff() yield do_return(acquired) except Exception as e: yield self.release_eff() raise e
def get_clb_contents(): """ Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB health monitor information is also returned as a pmap of :obj:`CLB` objects mapped on LB ID. :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`) :rtype: :obj:`Effect` """ # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids] healthmon_reqs = [ _retry(get_clb_health_monitor(lb_id).on(error=gone(None))) for lb_id in lb_ids] all_nodes_hms = yield parallel(node_reqs + healthmon_reqs) all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):] lb_nodes = { lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes)} clbs = { str(lb_id): CLB(bool(health_mon)) for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None} draining = [n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING] feeds = yield parallel( [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on( error=gone(None))) for n in draining] ) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: node.drained_at = extract_clb_drained_at(feed) return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(( list(filter(bool, nodes)), pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
def convergence_succeeded(executor, scaling_group, group_state, resources, now): """ Handle convergence success """ if group_state.status == ScalingGroupStatus.DELETING: # servers have been deleted. Delete the group for real yield Effect(DeleteGroup(tenant_id=scaling_group.tenant_id, group_id=scaling_group.uuid)) yield do_return(ConvergenceIterationStatus.GroupDeleted()) elif group_state.status == ScalingGroupStatus.ERROR: yield Effect(UpdateGroupStatus(scaling_group=scaling_group, status=ScalingGroupStatus.ACTIVE)) yield cf_msg('group-status-active', status=ScalingGroupStatus.ACTIVE.name) # update servers cache with latest servers yield executor.update_cache(scaling_group, now, include_deleted=False, **resources) yield do_return(ConvergenceIterationStatus.Stop())
def convergence_succeeded(executor, scaling_group, group_state, resources): """ Handle convergence success """ if group_state.status == ScalingGroupStatus.DELETING: # servers have been deleted. Delete the group for real yield Effect(DeleteGroup(tenant_id=scaling_group.tenant_id, group_id=scaling_group.uuid)) yield do_return(ConvergenceIterationStatus.GroupDeleted()) elif group_state.status == ScalingGroupStatus.ERROR: yield Effect(UpdateGroupStatus(scaling_group=scaling_group, status=ScalingGroupStatus.ACTIVE)) yield cf_msg('group-status-active', status=ScalingGroupStatus.ACTIVE.name) # update servers cache with latest servers. # See [Convergence servers cache] comment on top of the file. now = yield Effect(Func(datetime.utcnow)) yield executor.update_cache(scaling_group, now, include_deleted=False, **resources) yield do_return(ConvergenceIterationStatus.Stop())
def group_steps(group): """ Return Effect of list of steps that would be performed on the group if convergence is triggered on it with desired=actual. Also returns current delta of desired and actual """ now_dt = yield Effect(Func(datetime.utcnow)) all_data_eff = convergence_exec_data( group["tenantId"], group["groupId"], now_dt, get_executor) try: all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"])) except Exception as e: yield do_return((e, 0)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data delta = desired_group_state.capacity - len(resources['servers']) desired_group_state.capacity = len(resources['servers']) steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), 3600, {}, **resources) yield do_return((steps, delta))
def perform_get_children_with_stats(kz_client, dispatcher, intent): """ Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``. :param kz_client: txKazoo client :param dispatcher: dispatcher, supplied by perform :param GetChildrenWithStats intent: the intent """ path = intent.path children = yield Effect(GetChildren(path)) stats = yield parallel(Effect(GetStat(path + "/" + p)) for p in children) yield do_return([c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None])
def get_recently_converged_groups(recently_converged, interval): """ Return a list of recently converged groups, and garbage-collect any groups in the recently_converged map that are no longer 'recent'. """ # STM would be cool but this is synchronous so whatever recent = yield recently_converged.read() now = yield Effect(Func(time.time)) to_remove = [group for group in recent if now - recent[group] > interval] cleaned = reduce(lambda m, g: m.remove(g), to_remove, recent) if recent != cleaned: yield recently_converged.modify(lambda _: cleaned) yield do_return(cleaned.keys())
def get_todays_scaling_groups(convergence_tids, fname): """ Get scaling groups that from tenants that are enabled till today """ groups = yield Effect(GetAllGroups()) non_conv_tenants = set(groups.keys()) - set(convergence_tids) last_tenants_len, last_date = yield get_last_info(fname) now = yield Effect(Func(datetime.utcnow)) tenants, last_tenants_len, last_date = get_todays_tenants( non_conv_tenants, now, last_tenants_len, last_date) yield update_last_info(fname, last_tenants_len, last_date) yield do_return( keyfilter(lambda t: t in set(tenants + convergence_tids), groups))
def perform_get_children_with_stats(kz_client, dispatcher, intent): """ Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``. :param kz_client: txKazoo client :param dispatcher: dispatcher, supplied by perform :param GetChildrenWithStats intent: the intent """ path = intent.path children = yield Effect(GetChildren(path)) stats = yield parallel(Effect(GetStat(path + '/' + p)) for p in children) yield do_return([ c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None ])
def read_entries(service_type, url, params, direction, follow_limit=100, log_msg_type=None): """ Read all feed entries and follow in given direction until it is empty :param service_type: Service hosting the feed :type service_type: A member of :class:`ServiceType` :param str url: CF URL to append :param dict params: HTTP parameters :param direction: Where to continue fetching? :type direction: A member of :class:`Direction` :param int follow_limit: Maximum number of times to follow in given direction :return: (``list`` of :obj:`Element`, last fetched params) tuple """ if direction == Direction.PREVIOUS: direction_link = atom.previous_link elif direction == Direction.NEXT: direction_link = atom.next_link else: raise ValueError("Invalid direction") if log_msg_type is not None: log_cb = log_success_response(log_msg_type, identity, False) else: log_cb = identity all_entries = [] while follow_limit > 0: resp, feed_str = yield service_request(service_type, "GET", url, params=params, json_response=False).on(log_cb) feed = atom.parse(feed_str) entries = atom.entries(feed) if entries == []: break all_entries.extend(entries) link = direction_link(feed) if link is None: break params = parse_qs(urlparse(link).query) follow_limit -= 1 yield do_return((all_entries, params))
def convergence_failed(scaling_group, reasons): """ Handle convergence failure """ yield Effect(UpdateGroupStatus(scaling_group=scaling_group, status=ScalingGroupStatus.ERROR)) presented_reasons = sorted(present_reasons(reasons)) if len(presented_reasons) == 0: presented_reasons = [u"Unknown error occurred"] yield cf_err( 'group-status-error', status=ScalingGroupStatus.ERROR.name, reasons=presented_reasons) yield Effect(UpdateGroupErrorReasons(scaling_group, presented_reasons)) yield do_return(ConvergenceIterationStatus.Stop())
def get_clb_contents(): """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`.""" # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [ _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids ] all_nodes = yield parallel(node_reqs) lb_nodes = { lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes) } draining = [ n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING ] feeds = yield parallel([ _retry( get_clb_node_feed(n.description.lb_id, n.node_id).on(error=gone(None))) for n in draining ]) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None ]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: return assoc_obj(node, drained_at=extract_CLB_drained_at(feed)) else: return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(list(filter(bool, nodes)))
def call_if_acquired(lock, eff): """ Call ``eff`` if ``lock`` is acquired. If not, try to acquire the lock and call ``eff``. This function is different from :func:`otter.util.deferredutils.with_lock` where this does not release the lock after calling ``func``. Also it expects that lock may already be acquired. :param lock: Lock object from :obj:`TxKazooClient` :param eff: ``Effect`` to call if lock is/was acquired :return: (eff return, lock acquired bool) tuple. first element may be ``NOT_CALLED`` of eff was not called :rtype: ``Effect`` of ``bool`` """ if (yield lock.is_acquired_eff()): ret = yield eff yield do_return((ret, False)) else: if (yield lock.acquire_eff(False, None)): ret = yield eff yield do_return((ret, True)) else: yield do_return((NOT_CALLED, False))
def group_steps(group): """ Return Effect of list of steps that would be performed on the group if convergence is triggered on it with desired=actual """ now_dt = yield Effect(Func(datetime.utcnow)) all_data_eff = convergence_exec_data(group["tenantId"], group["groupId"], now_dt, get_executor) all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"])) (executor, scaling_group, group_state, desired_group_state, resources) = all_data desired_group_state.desired = len(resources['servers']) steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), 3600, **resources) yield do_return(steps)
def group_steps(group): """ Return Effect of list of steps that would be performed on the group if convergence is triggered on it with desired=actual """ now_dt = yield Effect(Func(datetime.utcnow)) all_data_eff = convergence_exec_data( group["tenantId"], group["groupId"], now_dt, get_executor) all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"])) (executor, scaling_group, group_state, desired_group_state, resources) = all_data desired_group_state.desired = len(resources['servers']) steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), 3600, **resources) yield do_return(steps)
def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result)
def read_entries(service_type, url, params, direction, follow_limit=100, log_msg_type=None): """ Read all feed entries and follow in given direction until it is empty :param service_type: Service hosting the feed :type service_type: A member of :class:`ServiceType` :param str url: CF URL to append :param dict params: HTTP parameters :param direction: Where to continue fetching? :type direction: A member of :class:`Direction` :param int follow_limit: Maximum number of times to follow in given direction :return: (``list`` of :obj:`Element`, last fetched params) tuple """ if direction == Direction.PREVIOUS: direction_link = atom.previous_link elif direction == Direction.NEXT: direction_link = atom.next_link else: raise ValueError("Invalid direction") if log_msg_type is not None: log_cb = log_success_response(log_msg_type, identity, False) else: log_cb = identity all_entries = [] while follow_limit > 0: resp, feed_str = yield service_request( service_type, "GET", url, params=params, json_response=False).on(log_cb) feed = atom.parse(feed_str) entries = atom.entries(feed) if entries == []: break all_entries.extend(entries) link = direction_link(feed) if link is None: break params = parse_qs(urlparse(link).query) follow_limit -= 1 yield do_return((all_entries, params))
def convergence_failed(scaling_group, reasons, timedout=False): """ Handle convergence failure """ yield Effect( UpdateGroupStatus(scaling_group=scaling_group, status=ScalingGroupStatus.ERROR)) presented_reasons = sorted(present_reasons(reasons)) if len(presented_reasons) == 0: presented_reasons = [u"Unknown error occurred"] elif timedout: presented_reasons = [ "Timed out: {}".format(reason) for reason in presented_reasons ] yield cf_err('group-status-error', status=ScalingGroupStatus.ERROR.name, reasons=presented_reasons) yield Effect(UpdateGroupErrorReasons(scaling_group, presented_reasons)) yield do_return(ConvergenceIterationStatus.Stop())
def non_concurrently(locks, key, eff): """ Run some Effect non-concurrently. :param Reference locks: A reference to a PSet that will be used to record which operations are currently being executed. :param key: the key to use for this particular operation, which will be stored in ``locks`` :param Effect eff: the effect to execute. :return: Effect with the result of ``eff``, or an error of :obj:`ConcurrentError` if the given key already has an associated effect being performed. """ if key in (yield locks.read()): raise ConcurrentError(key) yield locks.modify(lambda cc: cc.add(key)) try: result = yield eff finally: yield locks.modify(lambda cc: cc.remove(key)) yield do_return(result)
def get_clb_contents(): """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`.""" # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids] all_nodes = yield parallel(node_reqs) lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes)} draining = [n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING] feeds = yield parallel( [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on( error=gone(None))) for n in draining] ) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: return assoc_obj(node, drained_at=extract_CLB_drained_at(feed)) else: return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(list(filter(bool, nodes)))
def get_clb_node_feed(lb_id, node_id): """ Get the atom feed associated with a CLB node. :param int lb_id: Cloud Load balancer ID :param int node_id: Node ID of in loadbalancer node :returns: Effect of ``list`` of atom entry :class:`Element` :rtype: ``Effect`` """ all_entries = [] params = {} while True: feed_str = yield _node_feed_page(lb_id, node_id, params) feed = atom.parse(feed_str) entries = atom.entries(feed) if entries == []: break all_entries.extend(entries) next_link = atom.next_link(feed) if not next_link: break params = parse_qs(urlparse(next_link).query) yield do_return(all_entries)
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, step_limits, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable get_executor: like :func`get_executor`, used for testing. :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Begin convergence by updating group status to ACTIVE yield msg("begin-convergence") try: yield Effect(LoadAndUpdateGroupStatus(tenant_id, group_id, ScalingGroupStatus.ACTIVE)) except NoSuchScalingGroupError: # Expected for DELETING group. Ignore. pass # Gather data now_dt = yield Effect(Func(datetime.utcnow)) try: all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data except FirstError as fe: if fe.exc_info[0] is NoSuchEndpoint: result = yield convergence_failed( tenant_id, group_id, [ErrorReason.Exception(fe.exc_info)]) yield do_return(result) raise fe # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, step_limits, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded( executor, scaling_group, group_state, resources) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(tenant_id, group_id, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting # Prefix "Timed out" to all limited retry reasons result = yield convergence_failed(tenant_id, group_id, reasons, True) else: yield waiting.modify( lambda group_iterations: group_iterations.set(group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def challenge(): line = None while line != 'To seek the Holy Grail.\n': yield Effect(Print('What... is your quest?')) line = yield Effect(Readline()) yield do_return(line)
def increment(request): num = int(request.form['number']) yield Effect(Func(increment_counter, num)) yield do_return(redirect(url_for('root')))
def converge_all_groups(currently_converging, recently_converged, waiting, my_buckets, all_buckets, divergent_flags, build_timeout, interval, limited_retry_iterations, step_limits, converge_one_group=converge_one_group): """ Check for groups that need convergence and which match up to the buckets we've been allocated. :param Reference currently_converging: pset of currently converging groups :param Reference recently_converged: pmap of group ID to time last convergence finished :param Reference waiting: pmap of group ID to number of iterations already waited :param my_buckets: The buckets that should be checked for group IDs to converge on. :param all_buckets: The set of all buckets that can be checked for group IDs to converge on. ``my_buckets`` should be a subset of this. :param divergent_flags: divergent flags that were found in zookeeper. :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param number interval: number of seconds between attempts at convergence. Groups will not be converged if less than this amount of time has passed since the end of its last convergence. :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable converge_one_group: function to use to converge a single group - to be used for test injection only """ group_infos = get_my_divergent_groups(my_buckets, all_buckets, divergent_flags) # filter out currently converging groups cc = yield currently_converging.read() group_infos = [info for info in group_infos if info['group_id'] not in cc] if not group_infos: return yield msg('converge-all-groups', group_infos=group_infos, currently_converging=list(cc)) @do def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result) recent_groups = yield get_recently_converged_groups( recently_converged, interval) effs = [] for info in group_infos: tenant_id, group_id = info['tenant_id'], info['group_id'] if group_id in recent_groups: # Don't converge a group if it has recently been converged. continue eff = converge(tenant_id, group_id, info['dirty-flag']) effs.append( with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id)) yield do_return(parallel(effs))
def root(request): counter = yield Effect(Func(get_counter)) yield do_return(render_template('counter.html', counter=counter))
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, step_limits, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable get_executor: like :func`get_executor`, used for testing. :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Gather data yield msg("begin-convergence") now_dt = yield Effect(Func(datetime.utcnow)) all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, step_limits, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded(executor, scaling_group, group_state, resources, now_dt) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(scaling_group, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting # Prefix "Timed out" to all limited retry reasons result = yield convergence_failed(scaling_group, reasons, True) else: yield waiting.modify(lambda group_iterations: group_iterations.set( group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param callable get_all_convergence_data: like :func`get_all_convergence_data`, used for testing. :param callable plan: like :func:`plan`, to be used for test injection only :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Gather data yield msg("begin-convergence") now_dt = yield Effect(Func(datetime.utcnow)) all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded( executor, scaling_group, group_state, resources, now_dt) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(scaling_group, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting result = yield convergence_failed(scaling_group, reasons) else: yield waiting.modify( lambda group_iterations: group_iterations.set(group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def get_clb_contents(): """ Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB health monitor information is also returned as a pmap of :obj:`CLB` objects mapped on LB ID. :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`) :rtype: :obj:`Effect` """ # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [ _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids ] healthmon_reqs = [ _retry(get_clb_health_monitor(lb_id).on(error=gone(None))) for lb_id in lb_ids ] all_nodes_hms = yield parallel(node_reqs + healthmon_reqs) all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):] lb_nodes = { lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes) } clbs = { str(lb_id): CLB(bool(health_mon)) for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None } draining = [ n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING ] feeds = yield parallel([ _retry( get_clb_node_feed(n.description.lb_id, n.node_id).on(error=gone(None))) for n in draining ]) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None ]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: node.drained_at = extract_clb_drained_at(feed) return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return((list(filter(bool, nodes)), pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
def converge_all_groups( currently_converging, recently_converged, waiting, my_buckets, all_buckets, divergent_flags, build_timeout, interval, limited_retry_iterations, step_limits, converge_one_group=converge_one_group): """ Check for groups that need convergence and which match up to the buckets we've been allocated. :param Reference currently_converging: pset of currently converging groups :param Reference recently_converged: pmap of group ID to time last convergence finished :param Reference waiting: pmap of group ID to number of iterations already waited :param my_buckets: The buckets that should be checked for group IDs to converge on. :param all_buckets: The set of all buckets that can be checked for group IDs to converge on. ``my_buckets`` should be a subset of this. :param divergent_flags: divergent flags that were found in zookeeper. :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param number interval: number of seconds between attempts at convergence. Groups will not be converged if less than this amount of time has passed since the end of its last convergence. :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable converge_one_group: function to use to converge a single group - to be used for test injection only """ group_infos = get_my_divergent_groups( my_buckets, all_buckets, divergent_flags) # filter out currently converging groups cc = yield currently_converging.read() group_infos = [info for info in group_infos if info['group_id'] not in cc] if not group_infos: return yield msg('converge-all-groups', group_infos=group_infos, currently_converging=list(cc)) @do def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result) recent_groups = yield get_recently_converged_groups(recently_converged, interval) effs = [] for info in group_infos: tenant_id, group_id = info['tenant_id'], info['group_id'] if group_id in recent_groups: # Don't converge a group if it has recently been converged. continue eff = converge(tenant_id, group_id, info['dirty-flag']) effs.append( with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id)) yield do_return(parallel(effs))