Ejemplo n.º 1
0
def get_background_pools(experiment: Experiment) -> ThreadPoolExecutor:
    """
    Create a pool for background activities. The pool is as big as the number
    of declared background activities. If none are declared, returned `None`.
    """
    method = experiment.get("method", [])
    rollbacks = experiment.get("rollbacks", [])

    activity_background_count = 0
    for activity in method:
        if activity and activity.get("background"):
            activity_background_count = activity_background_count + 1

    activity_pool = None
    if activity_background_count:
        logger.debug("{c} activities will be run in the background".format(
            c=activity_background_count))
        activity_pool = ThreadPoolExecutor(activity_background_count)

    rollback_background_pool = 0
    for activity in rollbacks:
        if activity and activity.get("background"):
            rollback_background_pool = rollback_background_pool + 1

    rollback_pool = None
    if rollback_background_pool:
        logger.debug("{c} rollbacks will be run in the background".format(
            c=rollback_background_pool))
        rollback_pool = ThreadPoolExecutor(rollback_background_pool)

    return activity_pool, rollback_pool
Ejemplo n.º 2
0
def get_all_activities(experiment: Experiment) -> List[Activity]:
    activities = []
    activities.extend(
        experiment.get("steady-state-hypothesis", {}).get("probes", []))
    activities.extend(experiment.get("method", []))
    activities.extend(experiment.get("rollbacks", []))
    return activities
Ejemplo n.º 3
0
def get_controls(experiment: Experiment) -> List[Control]:
    controls = []
    controls.extend(experiment.get("controls", []))
    controls.extend(experiment.get("steady-state-hypothesis", {}).get("controls", []))

    for activity in get_all_activities(experiment):
        controls.extend(activity.get("controls", []))
    return controls
Ejemplo n.º 4
0
 def configure(self, experiment: Experiment, settings: Settings,
               experiment_vars: Dict[str, Any]) -> None:
     config_vars, secret_vars = experiment_vars or (None, None)
     self.settings = settings if settings is not None else \
         get_loaded_settings()
     self.config = load_configuration(experiment.get("configuration", {}),
                                      config_vars)
     self.secrets = load_secrets(experiment.get("secrets", {}), self.config,
                                 secret_vars)
Ejemplo n.º 5
0
def cache_activities(experiment: Experiment) -> List[Activity]:
    """
    Cache all activities into a map so we can quickly lookup ref.
    """
    logger.debug("Building activity cache...")

    lot = experiment.get("method", []) + \
        experiment.get("steady-state-hypothesis", {}).get("probes", [])

    for activity in lot:
        name = activity.get("name")
        if name:
            _cache[name] = activity

    logger.debug("Cached {d} activities".format(d=len(_cache)))
Ejemplo n.º 6
0
def run_rollbacks(experiment: Experiment,
                  configuration: Configuration,
                  secrets: Secrets,
                  pool: ThreadPoolExecutor,
                  dry: bool = False) -> Iterator[Run]:
    """
    Run all rollbacks declared in the experiment in their order. Wait for
    each rollback activity to complete before to the next unless the activity
    is declared with the `background` flag.
    """
    rollbacks = experiment.get("rollbacks", [])

    if not rollbacks:
        logger.info("No declared rollbacks, let's move on.")

    for activity in rollbacks:
        logger.info("Rollback: {t}".format(t=activity.get("name")))

        if activity.get("background"):
            logger.debug("rollback activity will run in the background")
            yield pool.submit(execute_activity,
                              experiment=experiment,
                              activity=activity,
                              configuration=configuration,
                              secrets=secrets,
                              dry=dry)
        else:
            yield execute_activity(experiment,
                                   activity,
                                   configuration=configuration,
                                   secrets=secrets,
                                   dry=dry)
Ejemplo n.º 7
0
def has_steady_state_hypothesis_with_probes(experiment: Experiment) -> bool:
    steady_state_hypothesis = experiment.get("steady-state-hypothesis")
    if steady_state_hypothesis:
        probes = steady_state_hypothesis.get("probes")
        if probes:
            return len(probes) > 0
    return False
Ejemplo n.º 8
0
def run_activities(experiment: Experiment,
                   configuration: Configuration,
                   secrets: Secrets,
                   pool: ThreadPoolExecutor,
                   dry: bool = False) -> Iterator[Run]:
    """
    Internal generator that iterates over all activities and execute them.
    Yields either the result of the run or a :class:`concurrent.futures.Future`
    if the activity was set to run in the `background`.
    """
    method = experiment.get("method", [])

    if not method:
        logger.info("No declared activities, let's move on.")

    for activity in method:
        if activity.get("background"):
            logger.debug("activity will run in the background")
            yield pool.submit(execute_activity,
                              experiment=experiment,
                              activity=activity,
                              configuration=configuration,
                              secrets=secrets,
                              dry=dry)
        else:
            yield execute_activity(experiment=experiment,
                                   activity=activity,
                                   configuration=configuration,
                                   secrets=secrets,
                                   dry=dry)
Ejemplo n.º 9
0
def ensure_hypothesis_is_valid(experiment: Experiment):
    """
    Validates that the steady state hypothesis entry has the expected schema
    or raises :exc:`InvalidExperiment` or :exc:`InvalidProbe`.
    """
    hypo = experiment.get("steady-state-hypothesis")
    if hypo is None:
        return

    if not hypo.get("title"):
        raise InvalidExperiment("hypothesis requires a title")

    probes = hypo.get("probes")
    if probes:
        for probe in probes:
            ensure_activity_is_valid(probe)

            if "tolerance" not in probe:
                raise InvalidActivity(
                    "hypothesis probe must have a tolerance entry")

            if not isinstance(probe["tolerance"], (
                    bool, int, list, str, dict)):
                raise InvalidActivity(
                    "hypothesis probe tolerance must either be an integer, "
                    "a string, a boolean or a pair of values for boundaries. "
                    "It can also be a dictionary which is a probe activity "
                    "definition that takes an argument called `value` with "
                    "the value of the probe itself to be validated")

            if isinstance(probe, dict):
                ensure_activity_is_valid(probe)
Ejemplo n.º 10
0
def get_all_activities_in_experiment(experiment: Experiment) -> List[Activity]:
    """
    Handy function to return all activities from a given experiment. Useful
    when you need to iterate over all the activities.
    """
    activities = []
    hypo = experiment.get("steady-state-hypothesis")
    if hypo:
        activities.extend(hypo.get("probes", []))

    method = experiment.get("method", [])
    activities.extend(method)

    rollbacks = experiment.get("rollbacks", [])
    activities.extend(rollbacks)

    return activities
Ejemplo n.º 11
0
def run_steady_state_hypothesis(experiment: Experiment,
                                configuration: Configuration, secrets: Secrets,
                                dry: bool = False):
    """
    Run all probes in the hypothesis and fail the experiment as soon as any of
    the probe fails or is outside the tolerance zone.
    """
    state = {
        "steady_state_met": None,
        "probes": []
    }
    hypo = experiment.get("steady-state-hypothesis")
    if not hypo:
        logger.info(
            "No steady state hypothesis defined. That's ok, just exploring.")
        return

    logger.info("Steady state hypothesis: {h}".format(h=hypo.get("title")))

    with controls(level="hypothesis", experiment=experiment, context=hypo,
                  configuration=configuration, secrets=secrets) as control:
        probes = hypo.get("probes", [])
        control.with_state(state)

        for activity in probes:
            run = execute_activity(
                experiment=experiment, activity=activity,
                configuration=configuration, secrets=secrets, dry=dry)

            state["probes"].append(run)

            if run["status"] == "failed":
                run["tolerance_met"] = False
                state["steady_state_met"] = False
                logger.warn("Probe terminated unexpectedly, "
                            "so its tolerance could not be validated")
                return state

            run["tolerance_met"] = True

            if dry:
                # do not check for tolerance when dry mode is on
                continue

            tolerance = activity.get("tolerance")
            logger.debug("allowed tolerance is {t}".format(t=str(tolerance)))
            checked = within_tolerance(
                tolerance, run["output"], configuration=configuration,
                secrets=secrets)
            if not checked:
                run["tolerance_met"] = False
                state["steady_state_met"] = False
                return state

        state["steady_state_met"] = True
        logger.info("Steady state hypothesis is met!")

    return state
Ejemplo n.º 12
0
def before_experiment_control(context: Experiment, **kwargs):
    """
    Create a tracing span when the experiment's execution begins.
    """
    tracer = local.tracer
    name = context.get("title")
    span = tracer.start_span(name)
    tracer.experiment_span = span

    span.set_tag('type', 'experiment')
    tags = context.get("tags")
    if tags:
        span.set_tag('target', ', '.join(tags))

    contributions = context.get("contributions")
    if contributions:
        for contribution in contributions:
            span.set_tag(contribution, contributions[contribution])

    if kwargs:
        span.log_kv(kwargs)
def add_contribution_model(experiment: Experiment):
    """
    Expose the contribution of that experiment to the report.

    As this is part of an extension, we bubble it up to the experiment itself
    for rendering purpose.
    """
    for extension in experiment.get("extensions", []):
        contributions = extension.get("contributions")
        if contributions:
            experiment["contributions"] = contributions
            break
Ejemplo n.º 14
0
def validate_extensions(experiment: Experiment):
    """
    Validate that extensions respect the specification.
    """
    extensions = experiment.get("extensions")
    if not extensions:
        return

    for ext in extensions:
        ext_name = ext.get("name")
        if not ext_name or not ext_name.strip():
            raise InvalidExperiment("All extensions require a non-empty name")
Ejemplo n.º 15
0
def get_context_controls(
    level: str,
    experiment: Experiment = None,  # noqa: C901
    context: Union[Activity, Experiment] = None,
) -> List[Control]:
    """
    Get the controls at the given level by merging those declared at the
    experiment level with the current's context.

    If a control is declared at the current level, do override it with an
    top-level ine.
    """
    glbl_controls = get_global_controls()
    if not experiment:
        return glbl_controls

    top_level_controls = experiment.get("controls", [])
    controls = copy(context.get("controls", []))
    controls.extend(glbl_controls)

    # do we even have something at the top level to be merged?
    if not top_level_controls:
        return controls

    if not controls:
        return [
            deepcopy(c) for c in top_level_controls
            if c.get("automatic", True)
        ]

    if level in ["method", "rollback"]:
        return [
            deepcopy(c) for c in top_level_controls
            if c.get("automatic", True)
        ]

    for c in controls:
        if "ref" in c:
            for top_level_control in top_level_controls:
                if c["ref"] == top_level_control["name"]:
                    controls.append(deepcopy(top_level_control))
                    break
        else:
            for tc in top_level_controls:
                if c.get("name") == tc.get("name"):
                    break
            else:
                if tc.get("automatic", True):
                    controls.append(deepcopy(tc))

    return controls
Ejemplo n.º 16
0
def get_extension(experiment: Experiment, name: str) -> Optional[Extension]:
    """
    Get an extension by its name.

    If no extensions were defined, or the extension doesn't exist in this
    experiment, return `None`.
    """
    extensions = experiment.get("extensions")
    if not extensions:
        return None

    for ext in extensions:
        ext_name = ext.get("name")
        if ext_name == name:
            return ext

    return None
Ejemplo n.º 17
0
def apply_python_control(
    level: str,
    control: Control,  # noqa: C901
    experiment: Experiment,
    context: Union[Activity, Experiment],
    state: Union[Journal, Run, List[Run]] = None,
    configuration: Configuration = None,
    secrets: Secrets = None,
    settings: Settings = None,
):
    """
    Apply a control by calling a function matching the given level.
    """
    provider = control["provider"]
    func_name = _level_mapping.get(level)
    func = load_func(control, func_name)
    if not func:
        return

    arguments = deepcopy(provider.get("arguments", {}))

    if configuration or secrets:
        arguments = substitute(arguments, configuration, secrets)

    sig = inspect.signature(func)

    if "secrets" in sig.parameters:
        arguments["secrets"] = secrets

    if "configuration" in sig.parameters:
        arguments["configuration"] = configuration

    if "state" in sig.parameters:
        arguments["state"] = state

    if "experiment" in sig.parameters:
        arguments["experiment"] = experiment

    if "extensions" in sig.parameters:
        arguments["extensions"] = experiment.get("extensions")

    if "settings" in sig.parameters:
        arguments["settings"] = settings

    func(context=context, **arguments)
Ejemplo n.º 18
0
def get_context_controls(level: str, experiment: Experiment,
                         context: Union[Activity, Experiment]) \
                         -> List[Control]:
    """
    Get the controls at the given level by merging those declared at the
    experiment level with the current's context.

    If a control is declared at the current level, do override it with an
    top-level ine.
    """
    top_level_controls = experiment.get("controls", [])

    controls = context.get("controls", [])
    if not controls:
        if not top_level_controls:
            return []
        else:
            return [
                deepcopy(c)
                for c in top_level_controls
                if c.get("automatic", True)
            ]

    if level in ["method", "rollback"]:
        return [
            deepcopy(c)
            for c in top_level_controls
            if c.get("automatic", True)
        ]

    for c in controls.copy():
        if "ref" in c:
            for top_level_control in top_level_controls:
                if c["ref"] == top_level_control["name"]:
                    controls.append(deepcopy(top_level_control))
                    break
        else:
            for tc in top_level_controls:
                if c.get("name") == tc.get("name"):
                    break
            else:
                if tc.get("automatic", True):
                    controls.append(deepcopy(tc))

    return controls
Ejemplo n.º 19
0
def ensure_hypothesis_is_valid(experiment: Experiment):
    """
    Validates that the steady state hypothesis entry has the expected schema
    or raises :exc:`InvalidExperiment` or :exc:`InvalidActivity`.
    """
    hypo = experiment.get("steady-state-hypothesis")
    if hypo is None:
        return

    if not hypo.get("title"):
        raise InvalidExperiment("hypothesis requires a title")

    probes = hypo.get("probes")
    if probes:
        for probe in probes:
            ensure_activity_is_valid(probe)

            if "tolerance" not in probe:
                raise InvalidActivity("hypothesis probe must have a tolerance entry")

            ensure_hypothesis_tolerance_is_valid(probe["tolerance"])
Ejemplo n.º 20
0
def run_steady_state_hypothesis(experiment: Experiment,
                                configuration: Configuration, secrets: Secrets,
                                dry: bool = False):
    """
    Run all probes in the hypothesis and fail the experiment as soon as any of
    the probe fails or is outside the tolerance zone.
    """
    state = {
        "steady_state_met": None,
        "probes": []
    }
    hypo = experiment.get("steady-state-hypothesis")
    if not hypo:
        logger.info(
            "No steady state hypothesis defined. That's ok, just exploring.")
        return

    logger.info("Steady state hypothesis: {h}".format(h=hypo.get("title")))

    probes = hypo.get("probes", [])
    for activity in probes:
        run = execute_activity(
            activity, configuration=configuration, secrets=secrets, dry=dry)
        run["tolerance_met"] = True
        state["probes"].append(run)
        if dry:
            # do not check for tolerance when dry mode is on
            continue

        tolerance = activity.get("tolerance")
        logger.debug("allowed tolerance is {t}".format(t=str(tolerance)))
        if not within_tolerance(tolerance, run["output"]):
            run["tolerance_met"] = False
            state["steady_state_met"] = False
            return state

    state["steady_state_met"] = True
    logger.info("Steady state hypothesis is met!")

    return state
Ejemplo n.º 21
0
def warn_about_deprecated_features(experiment: Experiment):
    """
    Warn about deprecated features.

    We do it globally so that we can warn only once about each feature and
    avoid repeating the same message over and over again.
    """
    warned_deprecations = {
        DeprecatedDictArgsMessage: False,
        DeprecatedVaultMissingPathMessage: False
    }
    activities = get_all_activities_in_experiment(experiment)

    for activity in activities:
        provider = activity.get("provider")
        if not provider:
            continue

        provider_type = provider.get("type")
        if provider_type == "process":
            arguments = provider.get("arguments")
            if not warned_deprecations[DeprecatedDictArgsMessage] and \
                    isinstance(arguments, dict):
                warned_deprecations[DeprecatedDictArgsMessage] = True
                warnings.warn(DeprecatedDictArgsMessage, DeprecationWarning)
                logger.warning(DeprecatedDictArgsMessage)

    # vault now expects the path property
    # see https://github.com/chaostoolkit/chaostoolkit-lib/issues/77
    for (target, keys) in experiment.get("secrets", {}).items():
        for (key, value) in keys.items():
            if isinstance(value, dict) and value.get("type") == "vault":
                if "key" in value and "path" not in value:
                    warned_deprecations[
                        DeprecatedVaultMissingPathMessage] = True
                    warnings.warn(DeprecatedVaultMissingPathMessage,
                                  DeprecationWarning)
                    logger.warning(DeprecatedVaultMissingPathMessage)
Ejemplo n.º 22
0
def initialize_execution(session: Session, experiment: Experiment,
                         journal: Journal) -> Optional[Response]:
    """
    Initialize the execution payload and send it over.
    """
    experiment_id = get_experiment_id(experiment.get('extensions'))
    if not experiment_id:
        logger.info("Missing experiment identifier")
        return

    journal["experiment"] = experiment
    journal["status"] = "running"
    execution_url = urls.execution(
        urls.experiment(session.base_url, experiment_id=experiment_id))
    try:
        with remove_sensitive_extension_values(journal["experiment"],
                                               ["experiment_path"]):
            data = json.dumps({"journal": journal},
                              ensure_ascii=False,
                              default=json_encoder)
        r = session.post(execution_url,
                         data=data,
                         headers={"content-type": "application/json"})
    except Exception:
        logger.debug("Failed to create execution", exc_info=True)
        return
    if r.status_code not in [200, 201]:
        is_json = 'application/json' in r.headers.get("content-type", '')
        error = r.json() if is_json else r.text
        logger.warning("Execution failed to be published: {}".format(error))
    else:
        logger.info("Execution available at {}".format(
            urls.clean(r.headers["Content-Location"])))
        payload = r.json()
        set_execution_id(payload["id"], experiment)

    return r
Ejemplo n.º 23
0
def ensure_verification_is_valid(experiment: Experiment):
    ensure_experiment_is_valid(experiment)

    extensions = experiment.get("extensions")
    if extensions is None:
        raise InvalidVerification(
            "a verification must have an extensions block")

    chaosiq_blocks = list(
        filter(lambda extension: extension.get("name", "") == "chaosiq",
               extensions))

    if not len(chaosiq_blocks) == 1:
        raise InvalidVerification(
            "a verification must have a single chaosiq extension block")

    verification = chaosiq_blocks[0].get("verification")
    if verification is None:
        raise InvalidVerification(
            "a verification must have a verification block")

    id = verification.get("id")
    if id is None:
        raise InvalidVerification("a verification must have an id")

    frequency_of_measurement = verification.get("frequency-of-measurement")
    if frequency_of_measurement is None:
        raise InvalidVerification(
            "a verification must have a frequency-of-measurement block")

    duration_of_conditions = verification.get("duration-of-conditions")
    if duration_of_conditions is None:
        raise InvalidVerification(
            "a verification must have a duration-of-conditions block")

    logger.info("Verification looks valid")
Ejemplo n.º 24
0
def ensure_experiment_is_valid(experiment: Experiment):
    """
    A chaos experiment consists of a method made of activities to carry
    sequentially.

    There are two kinds of activities:

    * probe: detecting the state of a resource in your system or external to it
      There are two kinds of probes: `steady` and `close`
    * action: an operation to apply against your system

    Usually, an experiment is made of a set of `steady` probes that ensure the
    system is sound to carry further the experiment. Then, an action before
    another set of of  ̀close` probes to sense the state of the system
    post-action.

    This function raises :exc:`InvalidExperiment`, :exc:`InvalidProbe` or
    :exc:`InvalidAction` depending on where it fails.
    """
    logger.info("Validating the experiment's syntax")

    if not experiment:
        raise InvalidExperiment("an empty experiment is not an experiment")

    if not experiment.get("title"):
        raise InvalidExperiment("experiment requires a title")

    if not experiment.get("description"):
        raise InvalidExperiment("experiment requires a description")

    tags = experiment.get("tags")
    if tags:
        if list(filter(lambda t: t == '' or not isinstance(t, str), tags)):
            raise InvalidExperiment(
                "experiment tags must be a non-empty string")

    validate_extensions(experiment)

    config = load_configuration(experiment.get("configuration", {}))
    load_secrets(experiment.get("secrets", {}), config)

    ensure_hypothesis_is_valid(experiment)

    method = experiment.get("method")
    if not method:
        raise InvalidExperiment("an experiment requires a method with "
                                "at least one activity")

    for activity in method:
        ensure_activity_is_valid(activity)

        # let's see if a ref is indeed found in the experiment
        ref = activity.get("ref")
        if ref and not lookup_activity(ref):
            raise InvalidActivity("referenced activity '{r}' could not be "
                                  "found in the experiment".format(r=ref))

    rollbacks = experiment.get("rollbacks", [])
    for activity in rollbacks:
        ensure_activity_is_valid(activity)

    warn_about_deprecated_features(experiment)

    validate_controls(experiment)

    logger.info("Experiment looks valid")
Ejemplo n.º 25
0
def hypothesis(experiment: Experiment) -> Hypothesis:
    return experiment.get("steady-state-hypothesis")
Ejemplo n.º 26
0
def get_org_id(experiment: Experiment) -> str:
    extensions = experiment.get("extensions", [])
    for extension in extensions:
        if extension["name"] == "chaosiq":
            return extension.get("org_id")
Ejemplo n.º 27
0
def set_execution_id(execution_id: str, experiment: Experiment) -> NoReturn:
    extensions = experiment.get("extensions", [])
    for extension in extensions:
        if extension["name"] == "chaosiq":
            extension["execution_id"] = execution_id
            break
Ejemplo n.º 28
0
def apply_activities(experiment: Experiment,
                     configuration: Configuration,
                     secrets: Secrets,
                     pool: ThreadPoolExecutor,
                     journal: Journal,
                     dry: bool = False) -> List[Run]:
    with controls(level="method",
                  experiment=experiment,
                  context=experiment,
                  configuration=configuration,
                  secrets=secrets) as control:
        result = []
        runs = []
        method = experiment.get("method", [])
        wait_for_background_activities = True

        try:
            for run in run_activities(experiment, configuration, secrets, pool,
                                      dry):
                runs.append(run)
                if journal["status"] in ["aborted", "failed", "interrupted"]:
                    break
        except SystemExit as x:
            # when we got a signal for an ungraceful exit, we can decide
            # not to wait for background activities. Their statuses will
            # remain failed.
            wait_for_background_activities = x.code != 30  # see exit.py
            raise
        finally:
            background_activity_timeout = None

            if wait_for_background_activities and pool:
                logger.debug("Waiting for background activities to complete")
                pool.shutdown(wait=True)
            elif pool:
                harshly_terminate_pending_background_activities(pool)
                logger.debug(
                    "Do not wait for the background activities to finish "
                    "as per signal")
                background_activity_timeout = 0.2
                pool.shutdown(wait=False)

            for index, run in enumerate(runs):
                if not run:
                    continue

                if isinstance(run, dict):
                    result.append(run)
                else:
                    try:
                        # background activities
                        result.append(
                            run.result(timeout=background_activity_timeout))
                    except TimeoutError:
                        # we want an entry for the background activity in our
                        # results anyway, we won't have anything meaningful
                        # to say about it
                        result.append({
                            "activity": method[index],
                            "status": "failed",
                            "output": None,
                            "duration": None,
                            "start": None,
                            "end": None,
                            "exception": None
                        })

            # now let's ensure the journal has all activities in their correct
            # order (background ones included)
            journal["run"] = result

            control.with_state(result)

    return result
Ejemplo n.º 29
0
    def _run(
            self,
            strategy: Strategy,
            schedule: Schedule,  # noqa: C901
            experiment: Experiment,
            journal: Journal,
            configuration: Configuration,
            secrets: Secrets,
            settings: Settings,
            event_registry: EventHandlerRegistry) -> None:
        experiment["title"] = substitute(experiment["title"], configuration,
                                         secrets)
        logger.info("Running experiment: {t}".format(t=experiment["title"]))

        started_at = time.time()
        journal = journal or initialize_run_journal(experiment)
        event_registry.started(experiment, journal)

        control = Control()
        activity_pool, rollback_pool = get_background_pools(experiment)
        hypo_pool = get_hypothesis_pool()
        continous_hypo_event = threading.Event()

        dry = experiment.get("dry", False)
        if dry:
            logger.warning("Dry mode enabled")

        initialize_global_controls(experiment, configuration, secrets,
                                   settings)
        initialize_controls(experiment, configuration, secrets)

        logger.info("Steady-state strategy: {}".format(strategy.value))
        rollback_strategy = settings.get("runtime",
                                         {}).get("rollbacks", {}).get(
                                             "strategy", "default")
        logger.info("Rollbacks strategy: {}".format(rollback_strategy))

        exit_gracefully_with_rollbacks = True
        with_ssh = has_steady_state_hypothesis_with_probes(experiment)
        if not with_ssh:
            logger.info("No steady state hypothesis defined. That's ok, just "
                        "exploring.")

        try:
            try:
                control.begin("experiment", experiment, experiment,
                              configuration, secrets)

                state = object()
                if with_ssh and should_run_before_method(strategy):
                    state = run_gate_hypothesis(experiment, journal,
                                                configuration, secrets,
                                                event_registry, dry)

                if state is not None:
                    if with_ssh and should_run_during_method(strategy):
                        run_hypothesis_during_method(hypo_pool,
                                                     continous_hypo_event,
                                                     strategy, schedule,
                                                     experiment, journal,
                                                     configuration, secrets,
                                                     event_registry, dry)

                    state = run_method(strategy, activity_pool, experiment,
                                       journal, configuration, secrets,
                                       event_registry, dry)

                    continous_hypo_event.set()
                    if journal["status"] not in ["interrupted", "aborted"]:
                        if with_ssh and (state is not None) and \
                                should_run_after_method(strategy):
                            run_deviation_validation_hypothesis(
                                experiment, journal, configuration, secrets,
                                event_registry, dry)
            except InterruptExecution as i:
                journal["status"] = "interrupted"
                logger.fatal(str(i))
                event_registry.interrupted(experiment, journal)
            except KeyboardInterrupt:
                journal["status"] = "interrupted"
                logger.warning("Received a termination signal (Ctrl-C)...")
                event_registry.signal_exit()
            except SystemExit as x:
                journal["status"] = "interrupted"
                logger.warning("Received the exit signal: {}".format(x.code))

                exit_gracefully_with_rollbacks = x.code != 30
                if not exit_gracefully_with_rollbacks:
                    logger.warning("Ignoring rollbacks as per signal")
                event_registry.signal_exit()
            finally:
                hypo_pool.shutdown(wait=True)

            # just in case a signal overrode everything else to tell us not to
            # play them anyway (see the exit.py module)
            if exit_gracefully_with_rollbacks:
                run_rollback(rollback_strategy, rollback_pool, experiment,
                             journal, configuration, secrets, event_registry,
                             dry)

            journal["end"] = datetime.utcnow().isoformat()
            journal["duration"] = time.time() - started_at

            # the spec only allows these statuses, so if it's anything else
            # we override to "completed"
            if journal["status"] not in ("completed", "failed", "aborted",
                                         "interrupted"):
                journal["status"] = "completed"

            has_deviated = journal["deviated"]
            status = "deviated" if has_deviated else journal["status"]
            logger.info("Experiment ended with status: {s}".format(s=status))
            if has_deviated:
                logger.info(
                    "The steady-state has deviated, a weakness may have been "
                    "discovered")

            control.with_state(journal)
            try:
                control.end("experiment", experiment, experiment,
                            configuration, secrets)
            except ChaosException:
                logger.debug("Failed to close controls", exc_info=True)
        finally:
            try:
                cleanup_controls(experiment)
                cleanup_global_controls()
            finally:
                event_registry.finish(journal)

        return journal
Ejemplo n.º 30
0
def run_experiment(experiment: Experiment,
                   settings: Settings = None) -> Journal:
    """
    Run the given `experiment` method step by step, in the following sequence:
    steady probe, action, close probe.

    Activities can be executed in background when they have the
    `"background"` property set to `true`. In that case, the activity is run in
    a thread. By the end of runs, those threads block until they are all
    complete.

    If the experiment has the `"dry"` property set to `False`, the experiment
    runs without actually executing the activities.

    NOTE: Tricky to make a decision whether we should rollback when exiting
    abnormally (Ctrl-C, SIGTERM...). Afterall, there is a chance we actually
    cannot afford to rollback properly. Better bailing to a conservative
    approach. This means we swallow :exc:`KeyboardInterrupt` and
    :exc:`SystemExit` and do not bubble it back up to the caller. We when were
    interrupted, we set the `interrupted` flag of the result accordingly to
    notify the caller this was indeed not terminated properly.
    """
    logger.info("Running experiment: {t}".format(t=experiment["title"]))

    dry = experiment.get("dry", False)
    if dry:
        logger.warning("Dry mode enabled")

    started_at = time.time()
    settings = settings if settings is not None else get_loaded_settings()
    config = load_configuration(experiment.get("configuration", {}))
    secrets = load_secrets(experiment.get("secrets", {}), config)
    initialize_global_controls(experiment, config, secrets, settings)
    initialize_controls(experiment, config, secrets)
    activity_pool, rollback_pool = get_background_pools(experiment)

    control = Control()
    journal = initialize_run_journal(experiment)

    try:
        try:
            control.begin("experiment", experiment, experiment, config,
                          secrets)
            # this may fail the entire experiment right there if any of the
            # probes fail or fall out of their tolerance zone
            try:
                state = run_steady_state_hypothesis(experiment,
                                                    config,
                                                    secrets,
                                                    dry=dry)
                journal["steady_states"]["before"] = state
                if state is not None and not state["steady_state_met"]:
                    p = state["probes"][-1]
                    raise ActivityFailed(
                        "Steady state probe '{p}' is not in the given "
                        "tolerance so failing this experiment".format(
                            p=p["activity"]["name"]))
            except ActivityFailed as a:
                journal["steady_states"]["before"] = state
                journal["status"] = "failed"
                logger.fatal(str(a))
            else:
                try:
                    journal["run"] = apply_activities(experiment, config,
                                                      secrets, activity_pool,
                                                      dry)
                except Exception:
                    journal["status"] = "aborted"
                    logger.fatal(
                        "Experiment ran into an un expected fatal error, "
                        "aborting now.",
                        exc_info=True)
                else:
                    try:
                        state = run_steady_state_hypothesis(experiment,
                                                            config,
                                                            secrets,
                                                            dry=dry)
                        journal["steady_states"]["after"] = state
                        if state is not None and not state["steady_state_met"]:
                            journal["deviated"] = True
                            p = state["probes"][-1]
                            raise ActivityFailed(
                                "Steady state probe '{p}' is not in the given "
                                "tolerance so failing this experiment".format(
                                    p=p["activity"]["name"]))
                    except ActivityFailed as a:
                        journal["status"] = "failed"
                        logger.fatal(str(a))
        except InterruptExecution as i:
            journal["status"] = "interrupted"
            logger.fatal(str(i))
        except (KeyboardInterrupt, SystemExit):
            journal["status"] = "interrupted"
            logger.warn("Received an exit signal, "
                        "leaving without applying rollbacks.")
        else:
            journal["status"] = journal["status"] or "completed"
            journal["rollbacks"] = apply_rollbacks(experiment, config, secrets,
                                                   rollback_pool, dry)

        journal["end"] = datetime.utcnow().isoformat()
        journal["duration"] = time.time() - started_at

        has_deviated = journal["deviated"]
        status = "deviated" if has_deviated else journal["status"]

        logger.info("Experiment ended with status: {s}".format(s=status))

        if has_deviated:
            logger.info(
                "The steady-state has deviated, a weakness may have been "
                "discovered")

        control.with_state(journal)

        try:
            control.end("experiment", experiment, experiment, config, secrets)
        except ChaosException:
            logger.debug("Failed to close controls", exc_info=True)

    finally:
        cleanup_controls(experiment)
        cleanup_global_controls()

    return journal