Beispiel #1
0
def load_experiment(experiment_source: str,
                    settings: Settings = None,
                    verify_tls: bool = True) -> Experiment:
    """
    Load an experiment from the given source.

    The source may be a local file or a HTTP(s) URL. If the endpoint requires
    authentication, please set the appropriate entry in the settings file,
    under the `auths:` section, keyed by domain. For instance:

    ```yaml
    auths:
      mydomain.com:
        type: basic
        value: XYZ
      otherdomain.com:
        type: bearer
        value: UIY
      localhost:8081:
        type: digest
        value: UIY
    ```

    Set `verify_tls` to `False` if the source is a over a self-signed
    certificate HTTP endpoint to instruct the loader to not verify the
    certificates.
    """
    with controls(level="loader", context=experiment_source) as control:
        if os.path.exists(experiment_source):
            parsed = parse_experiment_from_file(experiment_source)
            control.with_state(parsed)
            return parsed

        p = urlparse(experiment_source)
        if not p.scheme and not os.path.exists(p.path):
            raise InvalidSource('Path "{}" does not exist.'.format(p.path))

        if p.scheme not in ("http", "https"):
            raise InvalidSource(
                "'{}' is not a supported source scheme.".format(p.scheme))

        headers = {"Accept": "application/json, application/x-yaml"}
        if settings:
            auths = settings.get("auths", [])
            for domain in auths:
                if domain == p.netloc:
                    auth = auths[domain]
                    headers["Authorization"] = "{} {}".format(
                        auth["type"], auth["value"])
                    break

        r = requests.get(experiment_source, headers=headers, verify=verify_tls)
        if r.status_code != 200:
            raise InvalidSource("Failed to fetch the experiment: {}".format(
                r.text))

        logger.debug("Fetched experiment: \n{}".format(r.text))
        parsed = parse_experiment_from_http(r)
        control.with_state(parsed)
        return parsed
Beispiel #2
0
def apply_rollbacks(experiment: Experiment,
                    configuration: Configuration,
                    secrets: Secrets,
                    pool: ThreadPoolExecutor,
                    dry: bool = False) -> List[Run]:
    logger.info("Let's rollback...")
    with controls(level="rollback",
                  experiment=experiment,
                  context=experiment,
                  configuration=configuration,
                  secrets=secrets) as control:
        rollbacks = list(
            run_rollbacks(experiment, configuration, secrets, pool, dry))

        if pool:
            logger.debug("Waiting for background rollbacks to complete...")
            pool.shutdown(wait=True)

        result = []
        for rollback in rollbacks:
            if not rollback:
                continue
            if isinstance(rollback, dict):
                result.append(rollback)
            else:
                result.append(rollback.result())

        control.with_state(result)

    return result
Beispiel #3
0
def apply_activities(experiment: Experiment,
                     configuration: Configuration,
                     secrets: Secrets,
                     pool: ThreadPoolExecutor,
                     dry: bool = False) -> List[Run]:
    with controls(level="method",
                  experiment=experiment,
                  context=experiment,
                  configuration=configuration,
                  secrets=secrets) as control:
        runs = list(
            run_activities(experiment, configuration, secrets, pool, dry))

        if pool:
            logger.debug("Waiting for background activities to complete...")
            pool.shutdown(wait=True)

        result = []
        for run in runs:
            if not run:
                continue
            if isinstance(run, dict):
                result.append(run)
            else:
                result.append(run.result())

        control.with_state(result)

    return result
Beispiel #4
0
def run_steady_state_hypothesis(experiment: Experiment,
                                configuration: Configuration, secrets: Secrets,
                                dry: bool = False):
    """
    Run all probes in the hypothesis and fail the experiment as soon as any of
    the probe fails or is outside the tolerance zone.
    """
    state = {
        "steady_state_met": None,
        "probes": []
    }
    hypo = experiment.get("steady-state-hypothesis")
    if not hypo:
        logger.info(
            "No steady state hypothesis defined. That's ok, just exploring.")
        return

    logger.info("Steady state hypothesis: {h}".format(h=hypo.get("title")))

    with controls(level="hypothesis", experiment=experiment, context=hypo,
                  configuration=configuration, secrets=secrets) as control:
        probes = hypo.get("probes", [])
        control.with_state(state)

        for activity in probes:
            run = execute_activity(
                experiment=experiment, activity=activity,
                configuration=configuration, secrets=secrets, dry=dry)

            state["probes"].append(run)

            if run["status"] == "failed":
                run["tolerance_met"] = False
                state["steady_state_met"] = False
                logger.warn("Probe terminated unexpectedly, "
                            "so its tolerance could not be validated")
                return state

            run["tolerance_met"] = True

            if dry:
                # do not check for tolerance when dry mode is on
                continue

            tolerance = activity.get("tolerance")
            logger.debug("allowed tolerance is {t}".format(t=str(tolerance)))
            checked = within_tolerance(
                tolerance, run["output"], configuration=configuration,
                secrets=secrets)
            if not checked:
                run["tolerance_met"] = False
                state["steady_state_met"] = False
                return state

        state["steady_state_met"] = True
        logger.info("Steady state hypothesis is met!")

    return state
Beispiel #5
0
def test_controls_are_applied_before_and_but_not_after_experiment():
    exp = deepcopy(experiments.ExperimentWithControls)
    exp["controls"][0]["scope"] = "before"
    with controls("experiment", exp, context=exp):
        assert "before_experiment_control" in exp
        assert exp["before_experiment_control"] is True

        exp["dry"] = True
        run_experiment(exp)

    assert "after_experiment_control" not in exp
Beispiel #6
0
def test_no_controls_get_applied_when_none_defined():
    exp = deepcopy(experiments.ExperimentWithoutControls)
    exp["dry"] = True

    with controls("experiment", exp, context=exp):
        assert "before_experiment_control" not in exp

        exp["dry"] = True
        run_experiment(exp)

    assert "after_experiment_control" not in exp
Beispiel #7
0
def test_controls_are_applied_before_and_after_experiment():
    exp = deepcopy(experiments.ExperimentWithControls)
    with controls("experiment", exp, context=exp):
        assert "before_experiment_control" in exp
        assert exp["before_experiment_control"] is True

        exp["dry"] = True
        journal = run_experiment(exp)

    assert "after_experiment_control" in exp
    assert exp["after_experiment_control"] is True
    assert journal["after_experiment_control"] is True
def test_controls_are_applied_not_before_and_but_after_experiment():
    exp = deepcopy(experiments.ExperimentWithControls)
    exp["controls"][0]["scope"] = "after"
    with controls("experiment", exp, context=exp):
        assert "before_experiment_control" not in exp

        exp["dry"] = Dry.ACTIVITIES
        journal = run_experiment(exp)

    assert "after_experiment_control" in exp
    assert exp["after_experiment_control"] is True
    assert journal["after_experiment_control"] is True
def test_controls_are_applied_before_and_after_rollbacks():
    exp = deepcopy(experiments.ExperimentWithControls)
    with controls("rollback", exp, context=exp):
        assert "before_rollback_control" in exp
        assert exp["before_rollback_control"] is True

        exp["dry"] = Dry.ACTIVITIES
        journal = run_experiment(exp)

    assert "after_rollback_control" in exp
    assert exp["after_rollback_control"] is True
    assert "after_rollback_control" in journal["rollbacks"]
Beispiel #10
0
def test_controls_are_applied_before_and_after_hypothesis():
    exp = deepcopy(experiments.ExperimentWithControls)
    hypo = exp["steady-state-hypothesis"]
    with controls("hypothesis", exp, context=hypo):
        assert "before_hypothesis_control" in hypo
        assert hypo["before_hypothesis_control"] is True

        exp["dry"] = True
        journal = run_experiment(exp)

    assert "after_hypothesis_control" in hypo
    assert hypo["after_hypothesis_control"] is True
    assert journal["steady_states"]["before"][
        "after_hypothesis_control"] is True
Beispiel #11
0
def test_controls_are_applied_before_and_after_activities():
    exp = deepcopy(experiments.ExperimentWithControls)
    exp["dry"] = True

    activities = get_all_activities(exp)
    for activity in activities:
        with controls("activity", exp, context=activity):
            assert activity["before_activity_control"] is True

            run = execute_activity(exp, activity, None, None, dry=False)

            assert "after_activity_control" in activity
            assert activity["after_activity_control"] is True
            assert run["after_activity_control"] is True
Beispiel #12
0
def execute_activity(experiment: Experiment, probe: Probe,
                     configuration: Configuration, secrets: Secrets) -> Run:
    """
    Low-level wrapper around the actual activity provider call to collect
    some meta data (like duration, start/end time, exceptions...) during
    the run.
    """
    ref = probe.get("ref")
    if ref:
        probe = lookup_activity(ref)
        if not probe:
            raise ActivityFailed(
                "could not find referenced activity '{r}'".format(r=ref))

    with controls(level="activity",
                  experiment=experiment,
                  context=probe,
                  configuration=configuration,
                  secrets=secrets) as control:
        pauses = probe.get("pauses", {})
        pause_before = pauses.get("before")
        if pause_before:
            time.sleep(pause_before)

        start = datetime.utcnow()

        run = {"activity": probe.copy(), "output": None}

        result = None
        try:
            result = run_activity(probe, configuration, secrets)
            run["output"] = result
            run["status"] = "succeeded"
        except ActivityFailed as x:
            run["status"] = "failed"
            run["output"] = result
            run["exception"] = traceback.format_exception(type(x), x, None)
        finally:
            end = datetime.utcnow()
            run["start"] = start.isoformat()
            run["end"] = end.isoformat()
            run["duration"] = (end - start).total_seconds()

            pause_after = pauses.get("after")
            if pause_after:
                time.sleep(pause_after)

        control.with_state(run)

    return run
Beispiel #13
0
def execute_activity(experiment: Experiment, activity: Activity,
                     configuration: Configuration,
                     secrets: Secrets, dry: bool = False) -> Run:
    """
    Low-level wrapper around the actual activity provider call to collect
    some meta data (like duration, start/end time, exceptions...) during
    the run.
    """
    ref = activity.get("ref")
    if ref:
        activity = lookup_activity(ref)
        if not activity:
            raise ActivityFailed(
                "could not find referenced activity '{r}'".format(r=ref))

    with controls(level="activity", experiment=experiment, context=activity,
                  configuration=configuration, secrets=secrets) as control:
        pauses = activity.get("pauses", {})
        pause_before = pauses.get("before")
        if pause_before:
            logger.info("Pausing before next activity for {d}s...".format(
                d=pause_before))
            # only pause when not in dry-mode
            if not dry:
                time.sleep(pause_before)

        if activity.get("background"):
            logger.info("{t}: {n} [in background]".format(
                t=activity["type"].title(), n=activity.get("name")))
        else:
            logger.info("{t}: {n}".format(
                t=activity["type"].title(), n=activity.get("name")))

        start = datetime.utcnow()

        run = {
            "activity": activity.copy(),
            "output": None
        }

        result = None
        interrupted = False
        try:
            # only run the activity itself when not in dry-mode
            if not dry:
                result = run_activity(activity, configuration, secrets)
            run["output"] = result
            run["status"] = "succeeded"
            if result is not None:
                logger.debug("  => succeeded with '{r}'".format(r=result))
            else:
                logger.debug("  => succeeded without any result value")
        except ActivityFailed as x:
            error_msg = str(x)
            run["status"] = "failed"
            run["output"] = result
            run["exception"] = traceback.format_exception(type(x), x, None)
            logger.error("  => failed: {x}".format(x=error_msg))
        finally:
            # capture the end time before we pause
            end = datetime.utcnow()
            run["start"] = start.isoformat()
            run["end"] = end.isoformat()
            run["duration"] = (end - start).total_seconds()

            pause_after = pauses.get("after")
            if pause_after and not interrupted:
                logger.info("Pausing after activity for {d}s...".format(
                    d=pause_after))
                # only pause when not in dry-mode
                if not dry:
                    time.sleep(pause_after)

        control.with_state(run)

    return run
Beispiel #14
0
def test_controls_may_interrupt_experiment():
    exp = deepcopy(experiments.ExperimentCanBeInterruptedByControl)
    with controls("experiment", exp, context=exp):
        exp["dry"] = True
        journal = run_experiment(exp)
        assert journal["status"] == "interrupted"
Beispiel #15
0
def apply_activities(experiment: Experiment,
                     configuration: Configuration,
                     secrets: Secrets,
                     pool: ThreadPoolExecutor,
                     journal: Journal,
                     dry: bool = False) -> List[Run]:
    with controls(level="method",
                  experiment=experiment,
                  context=experiment,
                  configuration=configuration,
                  secrets=secrets) as control:
        result = []
        runs = []
        method = experiment.get("method", [])
        wait_for_background_activities = True

        try:
            for run in run_activities(experiment, configuration, secrets, pool,
                                      dry):
                runs.append(run)
                if journal["status"] in ["aborted", "failed", "interrupted"]:
                    break
        except SystemExit as x:
            # when we got a signal for an ungraceful exit, we can decide
            # not to wait for background activities. Their statuses will
            # remain failed.
            wait_for_background_activities = x.code != 30  # see exit.py
            raise
        finally:
            background_activity_timeout = None

            if wait_for_background_activities and pool:
                logger.debug("Waiting for background activities to complete")
                pool.shutdown(wait=True)
            elif pool:
                harshly_terminate_pending_background_activities(pool)
                logger.debug(
                    "Do not wait for the background activities to finish "
                    "as per signal")
                background_activity_timeout = 0.2
                pool.shutdown(wait=False)

            for index, run in enumerate(runs):
                if not run:
                    continue

                if isinstance(run, dict):
                    result.append(run)
                else:
                    try:
                        # background activities
                        result.append(
                            run.result(timeout=background_activity_timeout))
                    except TimeoutError:
                        # we want an entry for the background activity in our
                        # results anyway, we won't have anything meaningful
                        # to say about it
                        result.append({
                            "activity": method[index],
                            "status": "failed",
                            "output": None,
                            "duration": None,
                            "start": None,
                            "end": None,
                            "exception": None
                        })

            # now let's ensure the journal has all activities in their correct
            # order (background ones included)
            journal["run"] = result

            control.with_state(result)

    return result
Beispiel #16
0
def execute_activity(
    experiment: Experiment,
    activity: Activity,
    configuration: Configuration,
    secrets: Secrets,
    dry: Dry,
) -> Run:
    """
    Low-level wrapper around the actual activity provider call to collect
    some meta data (like duration, start/end time, exceptions...) during
    the run.
    """
    ref = activity.get("ref")
    if ref:
        activity = lookup_activity(ref)
        if not activity:
            raise ActivityFailed(f"could not find referenced activity '{ref}'")

    with controls(
            level="activity",
            experiment=experiment,
            context=activity,
            configuration=configuration,
            secrets=secrets,
    ) as control:
        dry = activity.get("dry", dry)
        pauses = activity.get("pauses", {})
        pauses = substitute(pauses, configuration, secrets)
        pause_before = pauses.get("before")
        is_dry = False
        activity_type = activity["type"]
        if dry == Dry.ACTIONS:
            is_dry = activity_type == "action"
        elif dry == Dry.PROBES:
            is_dry = activity_type == "probe"
        elif dry == Dry.ACTIVITIES:
            is_dry = True
        if pause_before:
            logger.info(f"Pausing before next activity for {pause_before}s...")
            # pause when one of the dry flags are set
            if dry != Dry.PAUSE and not is_dry:
                time.sleep(pause_before)

        if activity.get("background"):
            logger.info("{t}: {n} [in background]".format(
                t=activity["type"].title(), n=activity.get("name")))
        else:
            logger.info("{t}: {n}".format(t=activity["type"].title(),
                                          n=activity.get("name")))

        start = datetime.utcnow()

        run = {"activity": activity.copy(), "output": None}

        result = None
        interrupted = False
        try:
            # pause when one of the dry flags are set
            if not is_dry:
                result = run_activity(activity, configuration, secrets)
            run["output"] = result
            run["status"] = "succeeded"
            if result is not None:
                logger.debug(f"  => succeeded with '{result}'")
            else:
                logger.debug("  => succeeded without any result value")
        except ActivityFailed as x:
            error_msg = str(x)
            run["status"] = "failed"
            run["output"] = result
            run["exception"] = traceback.format_exception(type(x), x, None)
            logger.error(f"  => failed: {error_msg}")
        finally:
            # capture the end time before we pause
            end = datetime.utcnow()
            run["start"] = start.isoformat()
            run["end"] = end.isoformat()
            run["duration"] = (end - start).total_seconds()

            pause_after = pauses.get("after")
            if pause_after and not interrupted:
                logger.info(f"Pausing after activity for {pause_after}s...")
                # pause when one of the dry flags are set
                if dry != Dry.PAUSE and not is_dry:
                    time.sleep(pause_after)

        control.with_state(run)

    return run