예제 #1
0
def run_deviation_validation_hypothesis(experiment: Experiment,
                                        journal: Journal,
                                        configuration: Configuration,
                                        secrets: Secrets,
                                        event_registry: EventHandlerRegistry,
                                        dry: bool = False) \
                                            -> Dict[str, Any]:
    """
    Run the hypothesis after the method and report to the journal if the
    experiment has deviated.
    """
    logger.debug("Running steady-state hypothesis after the method")
    event_registry.start_hypothesis_after(experiment)
    state = run_steady_state_hypothesis(experiment,
                                        configuration,
                                        secrets,
                                        dry=dry)
    journal["steady_states"]["after"] = state
    event_registry.hypothesis_after_completed(experiment, state, journal)
    if state is not None and \
            not state["steady_state_met"]:
        journal["deviated"] = True
        journal["status"] = "failed"
        p = state["probes"][-1]
        logger.fatal("Steady state probe '{p}' is not in the "
                     "given tolerance so failing this "
                     "experiment".format(p=p["activity"]["name"]))
    return state
예제 #2
0
def run_gate_hypothesis(experiment: Experiment,
                        journal: Journal,
                        configuration: Configuration,
                        secrets: Secrets,
                        event_registry: EventHandlerRegistry,
                        dry: bool = False) -> Dict[str, Any]:
    """
    Run the hypothesis before the method and bail the execution if it did
    not pass.
    """
    logger.debug("Running steady-state hypothesis before the method")
    event_registry.start_hypothesis_before(experiment)
    state = run_steady_state_hypothesis(experiment,
                                        configuration,
                                        secrets,
                                        dry=dry)
    journal["steady_states"]["before"] = state
    event_registry.hypothesis_before_completed(experiment, state, journal)
    if state is not None and not state["steady_state_met"]:
        journal["steady_states"]["before"] = state
        journal["status"] = "failed"

        p = state["probes"][-1]
        logger.fatal("Steady state probe '{p}' is not in the given "
                     "tolerance so failing this experiment".format(
                         p=p["activity"]["name"]))
        return
    return state
예제 #3
0
def run_hypothesis_continuously(
    event: threading.Event,
    schedule: Schedule,
    experiment: Experiment,
    journal: Journal,
    configuration: Configuration,
    secrets: Secrets,
    event_registry: EventHandlerRegistry,
    dry: bool = False,
):
    frequency = schedule.continuous_hypothesis_frequency
    fail_fast_ratio = schedule.fail_fast_ratio

    event_registry.start_continuous_hypothesis(frequency)
    logger.info(
        "Executing the steady-state hypothesis continuously "
        "every {} seconds".format(frequency)
    )

    failed_iteration = 0
    failed_ratio = 0
    iteration = 1
    while not event.is_set():
        # already marked as terminated, let's exit now
        if journal["status"] in ["failed", "interrupted", "aborted"]:
            break

        state = run_steady_state_hypothesis(experiment, configuration, secrets, dry=dry)
        journal["steady_states"]["during"].append(state)
        event_registry.continuous_hypothesis_iteration(iteration, state)

        if state is not None and not state["steady_state_met"]:
            failed_iteration += 1
            failed_ratio = (failed_iteration * 100) / iteration
            p = state["probes"][-1]
            logger.warning(
                "Continuous steady state probe '{p}' is not in the given "
                "tolerance".format(p=p["activity"]["name"])
            )

            if schedule.fail_fast:
                if failed_ratio >= fail_fast_ratio:
                    m = "Terminating immediately the experiment"
                    if failed_ratio != 0.0:
                        m = "{} after {:.1f}% hypothesis deviated".format(
                            m, failed_ratio
                        )
                    logger.info(m)
                    journal["status"] = "failed"
                    break
        iteration += 1

        # we do not adjust the frequency based on the time taken by probes
        # above. We really want frequency seconds between two iteration
        # not frequency as a total time of a single iteration
        event.wait(timeout=frequency)
예제 #4
0
def run_experiment(experiment: Experiment,
                   settings: Settings = None) -> Journal:
    """
    Run the given `experiment` method step by step, in the following sequence:
    steady probe, action, close probe.

    Activities can be executed in background when they have the
    `"background"` property set to `true`. In that case, the activity is run in
    a thread. By the end of runs, those threads block until they are all
    complete.

    If the experiment has the `"dry"` property set to `False`, the experiment
    runs without actually executing the activities.

    NOTE: Tricky to make a decision whether we should rollback when exiting
    abnormally (Ctrl-C, SIGTERM...). Afterall, there is a chance we actually
    cannot afford to rollback properly. Better bailing to a conservative
    approach. This means we swallow :exc:`KeyboardInterrupt` and
    :exc:`SystemExit` and do not bubble it back up to the caller. We when were
    interrupted, we set the `interrupted` flag of the result accordingly to
    notify the caller this was indeed not terminated properly.
    """
    logger.info("Running experiment: {t}".format(t=experiment["title"]))

    dry = experiment.get("dry", False)
    if dry:
        logger.warning("Dry mode enabled")

    started_at = time.time()
    settings = settings if settings is not None else get_loaded_settings()
    config = load_configuration(experiment.get("configuration", {}))
    secrets = load_secrets(experiment.get("secrets", {}), config)
    initialize_global_controls(experiment, config, secrets, settings)
    initialize_controls(experiment, config, secrets)
    activity_pool, rollback_pool = get_background_pools(experiment)

    control = Control()
    journal = initialize_run_journal(experiment)

    try:
        try:
            control.begin("experiment", experiment, experiment, config,
                          secrets)
            # this may fail the entire experiment right there if any of the
            # probes fail or fall out of their tolerance zone
            try:
                state = run_steady_state_hypothesis(experiment,
                                                    config,
                                                    secrets,
                                                    dry=dry)
                journal["steady_states"]["before"] = state
                if state is not None and not state["steady_state_met"]:
                    p = state["probes"][-1]
                    raise ActivityFailed(
                        "Steady state probe '{p}' is not in the given "
                        "tolerance so failing this experiment".format(
                            p=p["activity"]["name"]))
            except ActivityFailed as a:
                journal["steady_states"]["before"] = state
                journal["status"] = "failed"
                logger.fatal(str(a))
            else:
                try:
                    journal["run"] = apply_activities(experiment, config,
                                                      secrets, activity_pool,
                                                      dry)
                except Exception:
                    journal["status"] = "aborted"
                    logger.fatal(
                        "Experiment ran into an un expected fatal error, "
                        "aborting now.",
                        exc_info=True)
                else:
                    try:
                        state = run_steady_state_hypothesis(experiment,
                                                            config,
                                                            secrets,
                                                            dry=dry)
                        journal["steady_states"]["after"] = state
                        if state is not None and not state["steady_state_met"]:
                            journal["deviated"] = True
                            p = state["probes"][-1]
                            raise ActivityFailed(
                                "Steady state probe '{p}' is not in the given "
                                "tolerance so failing this experiment".format(
                                    p=p["activity"]["name"]))
                    except ActivityFailed as a:
                        journal["status"] = "failed"
                        logger.fatal(str(a))
        except InterruptExecution as i:
            journal["status"] = "interrupted"
            logger.fatal(str(i))
        except (KeyboardInterrupt, SystemExit):
            journal["status"] = "interrupted"
            logger.warn("Received an exit signal, "
                        "leaving without applying rollbacks.")
        else:
            journal["status"] = journal["status"] or "completed"
            journal["rollbacks"] = apply_rollbacks(experiment, config, secrets,
                                                   rollback_pool, dry)

        journal["end"] = datetime.utcnow().isoformat()
        journal["duration"] = time.time() - started_at

        has_deviated = journal["deviated"]
        status = "deviated" if has_deviated else journal["status"]

        logger.info("Experiment ended with status: {s}".format(s=status))

        if has_deviated:
            logger.info(
                "The steady-state has deviated, a weakness may have been "
                "discovered")

        control.with_state(journal)

        try:
            control.end("experiment", experiment, experiment, config, secrets)
        except ChaosException:
            logger.debug("Failed to close controls", exc_info=True)

    finally:
        cleanup_controls(experiment)
        cleanup_global_controls()

    return journal