def load_experiment(experiment_source: str, settings: Settings = None, verify_tls: bool = True) -> Experiment: """ Load an experiment from the given source. The source may be a local file or a HTTP(s) URL. If the endpoint requires authentication, please set the appropriate entry in the settings file, under the `auths:` section, keyed by domain. For instance: ```yaml auths: mydomain.com: type: basic value: XYZ otherdomain.com: type: bearer value: UIY localhost:8081: type: digest value: UIY ``` Set `verify_tls` to `False` if the source is a over a self-signed certificate HTTP endpoint to instruct the loader to not verify the certificates. """ with controls(level="loader", context=experiment_source) as control: if os.path.exists(experiment_source): parsed = parse_experiment_from_file(experiment_source) control.with_state(parsed) return parsed p = urlparse(experiment_source) if not p.scheme and not os.path.exists(p.path): raise InvalidSource('Path "{}" does not exist.'.format(p.path)) if p.scheme not in ("http", "https"): raise InvalidSource( "'{}' is not a supported source scheme.".format(p.scheme)) headers = {"Accept": "application/json, application/x-yaml"} if settings: auths = settings.get("auths", []) for domain in auths: if domain == p.netloc: auth = auths[domain] headers["Authorization"] = "{} {}".format( auth["type"], auth["value"]) break r = requests.get(experiment_source, headers=headers, verify=verify_tls) if r.status_code != 200: raise InvalidSource("Failed to fetch the experiment: {}".format( r.text)) logger.debug("Fetched experiment: \n{}".format(r.text)) parsed = parse_experiment_from_http(r) control.with_state(parsed) return parsed
def apply_rollbacks(experiment: Experiment, configuration: Configuration, secrets: Secrets, pool: ThreadPoolExecutor, dry: bool = False) -> List[Run]: logger.info("Let's rollback...") with controls(level="rollback", experiment=experiment, context=experiment, configuration=configuration, secrets=secrets) as control: rollbacks = list( run_rollbacks(experiment, configuration, secrets, pool, dry)) if pool: logger.debug("Waiting for background rollbacks to complete...") pool.shutdown(wait=True) result = [] for rollback in rollbacks: if not rollback: continue if isinstance(rollback, dict): result.append(rollback) else: result.append(rollback.result()) control.with_state(result) return result
def apply_activities(experiment: Experiment, configuration: Configuration, secrets: Secrets, pool: ThreadPoolExecutor, dry: bool = False) -> List[Run]: with controls(level="method", experiment=experiment, context=experiment, configuration=configuration, secrets=secrets) as control: runs = list( run_activities(experiment, configuration, secrets, pool, dry)) if pool: logger.debug("Waiting for background activities to complete...") pool.shutdown(wait=True) result = [] for run in runs: if not run: continue if isinstance(run, dict): result.append(run) else: result.append(run.result()) control.with_state(result) return result
def run_steady_state_hypothesis(experiment: Experiment, configuration: Configuration, secrets: Secrets, dry: bool = False): """ Run all probes in the hypothesis and fail the experiment as soon as any of the probe fails or is outside the tolerance zone. """ state = { "steady_state_met": None, "probes": [] } hypo = experiment.get("steady-state-hypothesis") if not hypo: logger.info( "No steady state hypothesis defined. That's ok, just exploring.") return logger.info("Steady state hypothesis: {h}".format(h=hypo.get("title"))) with controls(level="hypothesis", experiment=experiment, context=hypo, configuration=configuration, secrets=secrets) as control: probes = hypo.get("probes", []) control.with_state(state) for activity in probes: run = execute_activity( experiment=experiment, activity=activity, configuration=configuration, secrets=secrets, dry=dry) state["probes"].append(run) if run["status"] == "failed": run["tolerance_met"] = False state["steady_state_met"] = False logger.warn("Probe terminated unexpectedly, " "so its tolerance could not be validated") return state run["tolerance_met"] = True if dry: # do not check for tolerance when dry mode is on continue tolerance = activity.get("tolerance") logger.debug("allowed tolerance is {t}".format(t=str(tolerance))) checked = within_tolerance( tolerance, run["output"], configuration=configuration, secrets=secrets) if not checked: run["tolerance_met"] = False state["steady_state_met"] = False return state state["steady_state_met"] = True logger.info("Steady state hypothesis is met!") return state
def test_controls_are_applied_before_and_but_not_after_experiment(): exp = deepcopy(experiments.ExperimentWithControls) exp["controls"][0]["scope"] = "before" with controls("experiment", exp, context=exp): assert "before_experiment_control" in exp assert exp["before_experiment_control"] is True exp["dry"] = True run_experiment(exp) assert "after_experiment_control" not in exp
def test_no_controls_get_applied_when_none_defined(): exp = deepcopy(experiments.ExperimentWithoutControls) exp["dry"] = True with controls("experiment", exp, context=exp): assert "before_experiment_control" not in exp exp["dry"] = True run_experiment(exp) assert "after_experiment_control" not in exp
def test_controls_are_applied_before_and_after_experiment(): exp = deepcopy(experiments.ExperimentWithControls) with controls("experiment", exp, context=exp): assert "before_experiment_control" in exp assert exp["before_experiment_control"] is True exp["dry"] = True journal = run_experiment(exp) assert "after_experiment_control" in exp assert exp["after_experiment_control"] is True assert journal["after_experiment_control"] is True
def test_controls_are_applied_not_before_and_but_after_experiment(): exp = deepcopy(experiments.ExperimentWithControls) exp["controls"][0]["scope"] = "after" with controls("experiment", exp, context=exp): assert "before_experiment_control" not in exp exp["dry"] = Dry.ACTIVITIES journal = run_experiment(exp) assert "after_experiment_control" in exp assert exp["after_experiment_control"] is True assert journal["after_experiment_control"] is True
def test_controls_are_applied_before_and_after_rollbacks(): exp = deepcopy(experiments.ExperimentWithControls) with controls("rollback", exp, context=exp): assert "before_rollback_control" in exp assert exp["before_rollback_control"] is True exp["dry"] = Dry.ACTIVITIES journal = run_experiment(exp) assert "after_rollback_control" in exp assert exp["after_rollback_control"] is True assert "after_rollback_control" in journal["rollbacks"]
def test_controls_are_applied_before_and_after_hypothesis(): exp = deepcopy(experiments.ExperimentWithControls) hypo = exp["steady-state-hypothesis"] with controls("hypothesis", exp, context=hypo): assert "before_hypothesis_control" in hypo assert hypo["before_hypothesis_control"] is True exp["dry"] = True journal = run_experiment(exp) assert "after_hypothesis_control" in hypo assert hypo["after_hypothesis_control"] is True assert journal["steady_states"]["before"][ "after_hypothesis_control"] is True
def test_controls_are_applied_before_and_after_activities(): exp = deepcopy(experiments.ExperimentWithControls) exp["dry"] = True activities = get_all_activities(exp) for activity in activities: with controls("activity", exp, context=activity): assert activity["before_activity_control"] is True run = execute_activity(exp, activity, None, None, dry=False) assert "after_activity_control" in activity assert activity["after_activity_control"] is True assert run["after_activity_control"] is True
def execute_activity(experiment: Experiment, probe: Probe, configuration: Configuration, secrets: Secrets) -> Run: """ Low-level wrapper around the actual activity provider call to collect some meta data (like duration, start/end time, exceptions...) during the run. """ ref = probe.get("ref") if ref: probe = lookup_activity(ref) if not probe: raise ActivityFailed( "could not find referenced activity '{r}'".format(r=ref)) with controls(level="activity", experiment=experiment, context=probe, configuration=configuration, secrets=secrets) as control: pauses = probe.get("pauses", {}) pause_before = pauses.get("before") if pause_before: time.sleep(pause_before) start = datetime.utcnow() run = {"activity": probe.copy(), "output": None} result = None try: result = run_activity(probe, configuration, secrets) run["output"] = result run["status"] = "succeeded" except ActivityFailed as x: run["status"] = "failed" run["output"] = result run["exception"] = traceback.format_exception(type(x), x, None) finally: end = datetime.utcnow() run["start"] = start.isoformat() run["end"] = end.isoformat() run["duration"] = (end - start).total_seconds() pause_after = pauses.get("after") if pause_after: time.sleep(pause_after) control.with_state(run) return run
def execute_activity(experiment: Experiment, activity: Activity, configuration: Configuration, secrets: Secrets, dry: bool = False) -> Run: """ Low-level wrapper around the actual activity provider call to collect some meta data (like duration, start/end time, exceptions...) during the run. """ ref = activity.get("ref") if ref: activity = lookup_activity(ref) if not activity: raise ActivityFailed( "could not find referenced activity '{r}'".format(r=ref)) with controls(level="activity", experiment=experiment, context=activity, configuration=configuration, secrets=secrets) as control: pauses = activity.get("pauses", {}) pause_before = pauses.get("before") if pause_before: logger.info("Pausing before next activity for {d}s...".format( d=pause_before)) # only pause when not in dry-mode if not dry: time.sleep(pause_before) if activity.get("background"): logger.info("{t}: {n} [in background]".format( t=activity["type"].title(), n=activity.get("name"))) else: logger.info("{t}: {n}".format( t=activity["type"].title(), n=activity.get("name"))) start = datetime.utcnow() run = { "activity": activity.copy(), "output": None } result = None interrupted = False try: # only run the activity itself when not in dry-mode if not dry: result = run_activity(activity, configuration, secrets) run["output"] = result run["status"] = "succeeded" if result is not None: logger.debug(" => succeeded with '{r}'".format(r=result)) else: logger.debug(" => succeeded without any result value") except ActivityFailed as x: error_msg = str(x) run["status"] = "failed" run["output"] = result run["exception"] = traceback.format_exception(type(x), x, None) logger.error(" => failed: {x}".format(x=error_msg)) finally: # capture the end time before we pause end = datetime.utcnow() run["start"] = start.isoformat() run["end"] = end.isoformat() run["duration"] = (end - start).total_seconds() pause_after = pauses.get("after") if pause_after and not interrupted: logger.info("Pausing after activity for {d}s...".format( d=pause_after)) # only pause when not in dry-mode if not dry: time.sleep(pause_after) control.with_state(run) return run
def test_controls_may_interrupt_experiment(): exp = deepcopy(experiments.ExperimentCanBeInterruptedByControl) with controls("experiment", exp, context=exp): exp["dry"] = True journal = run_experiment(exp) assert journal["status"] == "interrupted"
def apply_activities(experiment: Experiment, configuration: Configuration, secrets: Secrets, pool: ThreadPoolExecutor, journal: Journal, dry: bool = False) -> List[Run]: with controls(level="method", experiment=experiment, context=experiment, configuration=configuration, secrets=secrets) as control: result = [] runs = [] method = experiment.get("method", []) wait_for_background_activities = True try: for run in run_activities(experiment, configuration, secrets, pool, dry): runs.append(run) if journal["status"] in ["aborted", "failed", "interrupted"]: break except SystemExit as x: # when we got a signal for an ungraceful exit, we can decide # not to wait for background activities. Their statuses will # remain failed. wait_for_background_activities = x.code != 30 # see exit.py raise finally: background_activity_timeout = None if wait_for_background_activities and pool: logger.debug("Waiting for background activities to complete") pool.shutdown(wait=True) elif pool: harshly_terminate_pending_background_activities(pool) logger.debug( "Do not wait for the background activities to finish " "as per signal") background_activity_timeout = 0.2 pool.shutdown(wait=False) for index, run in enumerate(runs): if not run: continue if isinstance(run, dict): result.append(run) else: try: # background activities result.append( run.result(timeout=background_activity_timeout)) except TimeoutError: # we want an entry for the background activity in our # results anyway, we won't have anything meaningful # to say about it result.append({ "activity": method[index], "status": "failed", "output": None, "duration": None, "start": None, "end": None, "exception": None }) # now let's ensure the journal has all activities in their correct # order (background ones included) journal["run"] = result control.with_state(result) return result
def execute_activity( experiment: Experiment, activity: Activity, configuration: Configuration, secrets: Secrets, dry: Dry, ) -> Run: """ Low-level wrapper around the actual activity provider call to collect some meta data (like duration, start/end time, exceptions...) during the run. """ ref = activity.get("ref") if ref: activity = lookup_activity(ref) if not activity: raise ActivityFailed(f"could not find referenced activity '{ref}'") with controls( level="activity", experiment=experiment, context=activity, configuration=configuration, secrets=secrets, ) as control: dry = activity.get("dry", dry) pauses = activity.get("pauses", {}) pauses = substitute(pauses, configuration, secrets) pause_before = pauses.get("before") is_dry = False activity_type = activity["type"] if dry == Dry.ACTIONS: is_dry = activity_type == "action" elif dry == Dry.PROBES: is_dry = activity_type == "probe" elif dry == Dry.ACTIVITIES: is_dry = True if pause_before: logger.info(f"Pausing before next activity for {pause_before}s...") # pause when one of the dry flags are set if dry != Dry.PAUSE and not is_dry: time.sleep(pause_before) if activity.get("background"): logger.info("{t}: {n} [in background]".format( t=activity["type"].title(), n=activity.get("name"))) else: logger.info("{t}: {n}".format(t=activity["type"].title(), n=activity.get("name"))) start = datetime.utcnow() run = {"activity": activity.copy(), "output": None} result = None interrupted = False try: # pause when one of the dry flags are set if not is_dry: result = run_activity(activity, configuration, secrets) run["output"] = result run["status"] = "succeeded" if result is not None: logger.debug(f" => succeeded with '{result}'") else: logger.debug(" => succeeded without any result value") except ActivityFailed as x: error_msg = str(x) run["status"] = "failed" run["output"] = result run["exception"] = traceback.format_exception(type(x), x, None) logger.error(f" => failed: {error_msg}") finally: # capture the end time before we pause end = datetime.utcnow() run["start"] = start.isoformat() run["end"] = end.isoformat() run["duration"] = (end - start).total_seconds() pause_after = pauses.get("after") if pause_after and not interrupted: logger.info(f"Pausing after activity for {pause_after}s...") # pause when one of the dry flags are set if dry != Dry.PAUSE and not is_dry: time.sleep(pause_after) control.with_state(run) return run