def test_multiple_msg(self): """ Multiple messages are logged when there are multiple log effects """ eff = msg("yo", a="b").on(lambda _: msg("goo", d="c")) self.assertIsNone(sync_perform(self.disp, eff)) self.log.msg.assert_has_calls([mock.call("yo", f1="v", a="b"), mock.call("goo", f1="v", d="c")])
def test_multiple_msg(self): """ Multiple messages are logged when there are multiple log effects """ eff = msg("yo", a='b').on(lambda _: msg("goo", d='c')) self.assertIsNone(sync_perform(self.disp, eff)) self.log.msg.assert_has_calls( [mock.call("yo", f1='v', a='b'), mock.call("goo", f1='v', d='c')])
def trigger_convergence(tenant_id, group_id): """ Trigger convergence on a scaling group """ eff = mark_divergent(tenant_id, group_id) return eff.on(success=lambda _: msg("mark-dirty-success"), error=log_and_raise("mark-dirty-failure"))
def test_nested_msg(self): """ message is logged when nested inside other effects """ eff = Effect(Constant("foo")).on(lambda _: msg("yo", a="b")).on(lambda _: Effect(Constant("goo"))) self.assertEqual(sync_perform(self.disp, eff), "goo") self.log.msg.assert_called_once_with("yo", f1="v", a="b")
def test_msg(self): """ message is logged with original field """ r = sync_perform(self.disp, msg("yo!")) self.assertIsNone(r) self.log.msg.assert_called_once_with("yo!", f1='v')
def _execute_steps(steps): """ Given a set of steps, executes them, logs the result, and returns the worst priority with a list of reasons for that result. :return: a tuple of (:class:`StepResult` constant, list of :obj:`ErrorReason`) """ if len(steps) > 0: results = yield steps_to_effect(steps) severity = [StepResult.FAILURE, StepResult.RETRY, StepResult.LIMITED_RETRY, StepResult.SUCCESS] priority = sorted(results, key=lambda (status, reasons): severity.index(status)) worst_status = priority[0][0] results_to_log = [ {'step': step, 'result': result, 'reasons': map(structure_reason, reasons)} for step, (result, reasons) in zip(steps, results) ] reasons = reduce(operator.add, (x[1] for x in results if x[0] == worst_status)) else: worst_status = StepResult.SUCCESS results_to_log = reasons = [] yield msg('execute-convergence-results', results=results_to_log, worst_status=worst_status.name) yield do_return((worst_status, reasons))
def test_msg_with_params(self): """ message is logged with its fields combined """ r = sync_perform(self.disp, msg("yo!", a='b')) self.assertIsNone(r) self.log.msg.assert_called_once_with("yo!", f1='v', a='b')
def _execute_steps(steps): """ Given a set of steps, executes them, logs the result, and returns the worst priority with a list of reasons for that result. :return: a tuple of (:class:`StepResult` constant., list of reasons) """ if len(steps) > 0: results = yield steps_to_effect(steps) severity = [ StepResult.FAILURE, StepResult.RETRY, StepResult.LIMITED_RETRY, StepResult.SUCCESS ] priority = sorted(results, key=lambda (status, reasons): severity.index(status)) worst_status = priority[0][0] results_to_log = [{ 'step': step, 'result': result, 'reasons': map(structure_reason, reasons) } for step, (result, reasons) in zip(steps, results)] reasons = reduce(operator.add, (x[1] for x in results if x[0] == worst_status)) else: worst_status = StepResult.SUCCESS results_to_log = reasons = [] yield msg('execute-convergence-results', results=results_to_log, worst_status=worst_status.name) yield do_return((worst_status, reasons))
def test_nested_msg(self): """ message is logged when nested inside other effects """ eff = Effect(Constant("foo")).on(lambda _: msg("yo", a='b')).on( lambda _: Effect(Constant("goo"))) self.assertEqual(sync_perform(self.disp, eff), "goo") self.log.msg.assert_called_once_with("yo", f1='v', a='b')
def test_nested_boundfields(self): """ BoundFields effects can be nested and the log effects internally will expand with all bound fields """ eff = Effect(Constant("foo")).on(lambda _: msg("foo", m="d")).on(lambda _: Effect(Constant("goo"))) e = Effect(Constant("abc")).on(lambda _: with_log(eff, i="a")).on(lambda _: Effect(Constant("def"))) self.assertEqual(sync_perform(self.disp, with_log(e, o="f")), "def") self.log.msg.assert_called_once_with("foo", i="a", f1="v", m="d", o="f")
def conv_resume_group_eff(trans_id, group): """ Resume scaling group of convergence enabled tenant """ eff = parallel([ Effect(ModifyGroupStatePaused(group, False)), mark_divergent(group.tenant_id, group.uuid).on( lambda _: msg("mark-dirty-success"))]) return with_log(eff, transaction_id=trans_id, tenant_id=group.tenant_id, scaling_group_id=group.uuid).on(lambda _: None)
def test_boundfields(self): """ When an effect is wrapped `BoundFields` then any logging effect inside is performed with fields setup in `BoundFields` """ f = object() eff = Effect(Constant("foo")).on(lambda _: err(f, "yo", a='b')).on( lambda _: msg("foo", m='d')).on(lambda _: Effect(Constant("goo"))) eff = with_log(eff, bf='new') self.assertEqual(sync_perform(self.disp, eff), "goo") self.log.msg.assert_called_once_with("foo", f1='v', bf='new', m='d') self.log.err.assert_called_once_with(f, "yo", f1='v', bf='new', a='b')
def conv_resume_group_eff(trans_id, group): """ Resume scaling group of convergence enabled tenant """ eff = parallel([ Effect(ModifyGroupStatePaused(group, False)), mark_divergent(group.tenant_id, group.uuid).on(lambda _: msg("mark-dirty-success")) ]) return with_log(eff, transaction_id=trans_id, tenant_id=group.tenant_id, scaling_group_id=group.uuid).on(lambda _: None)
def delete_divergent_flag(tenant_id, group_id, version): """ Delete the dirty flag, if its version hasn't changed. See note [Divergent flags] for more info. :return: Effect of None. """ flag = format_dirty_flag(tenant_id, group_id) path = CONVERGENCE_DIRTY_DIR + '/' + flag fields = dict(path=path, dirty_version=version) try: yield Effect(DeleteNode(path=path, version=version)) except BadVersionError: # BadVersionError shouldn't be logged as an error because it's an # expected occurrence any time convergence is requested multiple times # rapidly. yield msg('mark-clean-skipped', **fields) except NoNodeError: yield msg('mark-clean-not-found', **fields) except Exception: yield err(None, 'mark-clean-failure', **fields) else: yield msg('mark-clean-success')
def test_boundfields(self): """ When an effect is wrapped `BoundFields` then any logging effect inside is performed with fields setup in `BoundFields` """ f = object() eff = Effect(Constant("foo")).on( lambda _: err(f, "yo", a='b')).on( lambda _: msg("foo", m='d')).on( lambda _: Effect(Constant("goo"))) eff = with_log(eff, bf='new') self.assertEqual(sync_perform(self.disp, eff), "goo") self.log.msg.assert_called_once_with("foo", f1='v', bf='new', m='d') self.log.err.assert_called_once_with(f, "yo", f1='v', bf='new', a='b')
def test_nested_boundfields(self): """ BoundFields effects can be nested and the log effects internally will expand with all bound fields """ eff = Effect(Constant("foo")).on(lambda _: msg("foo", m='d')).on( lambda _: Effect(Constant("goo"))) e = Effect(Constant("abc")).on(lambda _: with_log(eff, i='a')).on( lambda _: Effect(Constant("def"))) self.assertEqual(sync_perform(self.disp, with_log(e, o='f')), "def") self.log.msg.assert_called_once_with('foo', i='a', f1='v', m='d', o='f')
def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result)
def check_and_trigger(tenant_id, group_id): """ Trigger convergence on given group if it is ACTIVE and not paused """ try: group, info = yield Effect( GetScalingGroupInfo(tenant_id=tenant_id, group_id=group_id)) except NoSuchScalingGroupError: # Nothing to do if group has been deleted yield msg("selfheal-group-deleted", tenant_id=tenant_id, scaling_group_id=group_id) else: state = info["state"] if state.status == ScalingGroupStatus.ACTIVE and (not state.paused): yield with_log( trigger_convergence(tenant_id, group_id), tenant_id=tenant_id, scaling_group_id=group_id)
def check_and_trigger(tenant_id, group_id): """ Trigger convergence on given group if it is ACTIVE and not paused """ try: group, info = yield Effect( GetScalingGroupInfo(tenant_id=tenant_id, group_id=group_id)) except NoSuchScalingGroupError: # Nothing to do if group has been deleted yield msg("selfheal-group-deleted", tenant_id=tenant_id, scaling_group_id=group_id) else: state = info["state"] if (state.status == ScalingGroupStatus.ACTIVE and not (state.paused or state.suspended)): yield with_log(trigger_convergence(tenant_id, group_id), tenant_id=tenant_id, scaling_group_id=group_id)
def converge_all_groups(currently_converging, recently_converged, waiting, my_buckets, all_buckets, divergent_flags, build_timeout, interval, limited_retry_iterations, step_limits, converge_one_group=converge_one_group): """ Check for groups that need convergence and which match up to the buckets we've been allocated. :param Reference currently_converging: pset of currently converging groups :param Reference recently_converged: pmap of group ID to time last convergence finished :param Reference waiting: pmap of group ID to number of iterations already waited :param my_buckets: The buckets that should be checked for group IDs to converge on. :param all_buckets: The set of all buckets that can be checked for group IDs to converge on. ``my_buckets`` should be a subset of this. :param divergent_flags: divergent flags that were found in zookeeper. :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param number interval: number of seconds between attempts at convergence. Groups will not be converged if less than this amount of time has passed since the end of its last convergence. :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable converge_one_group: function to use to converge a single group - to be used for test injection only """ group_infos = get_my_divergent_groups(my_buckets, all_buckets, divergent_flags) # filter out currently converging groups cc = yield currently_converging.read() group_infos = [info for info in group_infos if info['group_id'] not in cc] if not group_infos: return yield msg('converge-all-groups', group_infos=group_infos, currently_converging=list(cc)) @do def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result) recent_groups = yield get_recently_converged_groups( recently_converged, interval) effs = [] for info in group_infos: tenant_id, group_id = info['tenant_id'], info['group_id'] if group_id in recent_groups: # Don't converge a group if it has recently been converged. continue eff = converge(tenant_id, group_id, info['dirty-flag']) effs.append( with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id)) yield do_return(parallel(effs))
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, step_limits, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable get_executor: like :func`get_executor`, used for testing. :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Gather data yield msg("begin-convergence") now_dt = yield Effect(Func(datetime.utcnow)) all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, step_limits, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded(executor, scaling_group, group_state, resources, now_dt) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(scaling_group, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting # Prefix "Timed out" to all limited retry reasons result = yield convergence_failed(scaling_group, reasons, True) else: yield waiting.modify(lambda group_iterations: group_iterations.set( group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param callable get_all_convergence_data: like :func`get_all_convergence_data`, used for testing. :param callable plan: like :func:`plan`, to be used for test injection only :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Gather data yield msg("begin-convergence") now_dt = yield Effect(Func(datetime.utcnow)) all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded( executor, scaling_group, group_state, resources, now_dt) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(scaling_group, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting result = yield convergence_failed(scaling_group, reasons) else: yield waiting.modify( lambda group_iterations: group_iterations.set(group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def execute_convergence(tenant_id, group_id, build_timeout, waiting, limited_retry_iterations, step_limits, get_executor=get_executor): """ Gather data, plan a convergence, save active and pending servers to the group state, and then execute the convergence. :param str tenant_id: the tenant ID for the group to converge :param str group_id: the ID of the group to be converged :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param Reference waiting: pmap of waiting groups :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable get_executor: like :func`get_executor`, used for testing. :return: Effect of :obj:`ConvergenceIterationStatus`. :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist. """ clean_waiting = _clean_waiting(waiting, group_id) # Begin convergence by updating group status to ACTIVE yield msg("begin-convergence") try: yield Effect(LoadAndUpdateGroupStatus(tenant_id, group_id, ScalingGroupStatus.ACTIVE)) except NoSuchScalingGroupError: # Expected for DELETING group. Ignore. pass # Gather data now_dt = yield Effect(Func(datetime.utcnow)) try: all_data = yield msg_with_time( "gather-convergence-data", convergence_exec_data(tenant_id, group_id, now_dt, get_executor=get_executor)) (executor, scaling_group, group_state, desired_group_state, resources) = all_data except FirstError as fe: if fe.exc_info[0] is NoSuchEndpoint: result = yield convergence_failed( tenant_id, group_id, [ErrorReason.Exception(fe.exc_info)]) yield do_return(result) raise fe # prepare plan steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), build_timeout, step_limits, **resources) yield log_steps(steps) # Execute plan yield msg('execute-convergence', steps=steps, now=now_dt, desired=desired_group_state, **resources) worst_status, reasons = yield _execute_steps(steps) if worst_status != StepResult.LIMITED_RETRY: # If we're not waiting any more, there's no point in keeping track of # the group yield clean_waiting # Handle the status from execution if worst_status == StepResult.SUCCESS: result = yield convergence_succeeded( executor, scaling_group, group_state, resources) elif worst_status == StepResult.FAILURE: result = yield convergence_failed(tenant_id, group_id, reasons) elif worst_status is StepResult.LIMITED_RETRY: # We allow further iterations to proceed as long as we haven't been # waiting for a LIMITED_RETRY for N consecutive iterations. current_iterations = (yield waiting.read()).get(group_id, 0) if current_iterations > limited_retry_iterations: yield msg('converge-limited-retry-too-long') yield clean_waiting # Prefix "Timed out" to all limited retry reasons result = yield convergence_failed(tenant_id, group_id, reasons, True) else: yield waiting.modify( lambda group_iterations: group_iterations.set(group_id, current_iterations + 1)) result = ConvergenceIterationStatus.Continue() else: result = ConvergenceIterationStatus.Continue() yield do_return(result)
def converge_all_groups( currently_converging, recently_converged, waiting, my_buckets, all_buckets, divergent_flags, build_timeout, interval, limited_retry_iterations, step_limits, converge_one_group=converge_one_group): """ Check for groups that need convergence and which match up to the buckets we've been allocated. :param Reference currently_converging: pset of currently converging groups :param Reference recently_converged: pmap of group ID to time last convergence finished :param Reference waiting: pmap of group ID to number of iterations already waited :param my_buckets: The buckets that should be checked for group IDs to converge on. :param all_buckets: The set of all buckets that can be checked for group IDs to converge on. ``my_buckets`` should be a subset of this. :param divergent_flags: divergent flags that were found in zookeeper. :param number build_timeout: number of seconds to wait for servers to be in building before it's is timed out and deleted :param number interval: number of seconds between attempts at convergence. Groups will not be converged if less than this amount of time has passed since the end of its last convergence. :param int limited_retry_iterations: number of iterations to wait for LIMITED_RETRY steps :param dict step_limits: Mapping of step class to number of executions allowed in a convergence cycle :param callable converge_one_group: function to use to converge a single group - to be used for test injection only """ group_infos = get_my_divergent_groups( my_buckets, all_buckets, divergent_flags) # filter out currently converging groups cc = yield currently_converging.read() group_infos = [info for info in group_infos if info['group_id'] not in cc] if not group_infos: return yield msg('converge-all-groups', group_infos=group_infos, currently_converging=list(cc)) @do def converge(tenant_id, group_id, dirty_flag): stat = yield Effect(GetStat(dirty_flag)) # If the node disappeared, ignore it. `stat` will be None here if the # divergent flag was discovered only after the group is removed from # currently_converging, but before the divergent flag is deleted, and # then the deletion happens, and then our GetStat happens. This # basically means it happens when one convergence is starting as # another one for the same group is ending. if stat is None: yield msg('converge-divergent-flag-disappeared', znode=dirty_flag) else: eff = converge_one_group(currently_converging, recently_converged, waiting, tenant_id, group_id, stat.version, build_timeout, limited_retry_iterations, step_limits) result = yield Effect(TenantScope(eff, tenant_id)) yield do_return(result) recent_groups = yield get_recently_converged_groups(recently_converged, interval) effs = [] for info in group_infos: tenant_id, group_id = info['tenant_id'], info['group_id'] if group_id in recent_groups: # Don't converge a group if it has recently been converged. continue eff = converge(tenant_id, group_id, info['dirty-flag']) effs.append( with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id)) yield do_return(parallel(effs))