def run_http_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run a HTTP activity. A HTTP activity is a call to a HTTP endpoint and its result is returned as the raw result of this activity. Raises :exc:`FailedActivity` when a timeout occurs for the request or when the endpoint returns a status in the 400 or 500 ranges. This should be considered as a private function. """ provider = activity["provider"] url = substitute(provider["url"], configuration, secrets) method = provider.get("method", "GET").upper() headers = substitute(provider.get("headers", None), configuration, secrets) timeout = provider.get("timeout", None) arguments = provider.get("arguments", {}) verify_tls = provider.get("verify_tls", True) if configuration or secrets: arguments = substitute(arguments, configuration, secrets) try: if method == "GET": r = requests.get(url, params=arguments, headers=headers, timeout=timeout, verify=verify_tls) else: r = requests.request(method, url, data=arguments, headers=headers, timeout=timeout, verify=verify_tls) body = None if r.headers.get("Content-Type") == "application/json": body = r.json() else: body = r.text return { "status": r.status_code, "headers": dict(**r.headers), "body": body } except requests.exceptions.ConnectionError as cex: raise FailedActivity("failed to connect to {u}: {x}".format( u=url, x=str(cex))) except requests.exceptions.Timeout: raise FailedActivity("activity took too long to complete")
def run_python_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run a Python activity. A python activity is a function from any importable module. The result of that function is returned as the activity's output. This should be considered as a private function. """ provider = activity["provider"] mod_path = provider["module"] func_name = provider["func"] mod = importlib.import_module(mod_path) func = getattr(mod, func_name) arguments = provider.get("arguments", {}).copy() if configuration or secrets: arguments = substitute(arguments, configuration, secrets) sig = inspect.signature(func) if "secrets" in provider and "secrets" in sig.parameters: arguments["secrets"] = {} for s in provider["secrets"]: arguments["secrets"].update(secrets.get(s, {}).copy()) if "configuration" in sig.parameters: arguments["configuration"] = configuration.copy() try: return func(**arguments) except Exception as x: raise FailedActivity( traceback.format_exception_only( type(x), x)[0].strip()).with_traceback(sys.exc_info()[2])
def apply_python_control(level: str, control: Control, experiment: Experiment, context: Union[Activity, Experiment], state: Union[Journal, Run, List[Run]] = None, configuration: Configuration = None, secrets: Secrets = None): """ Apply a control by calling a function matching the given level. """ provider = control["provider"] func_name = _level_mapping.get(level) func = load_func(control, func_name) if not func: return arguments = deepcopy(provider.get("arguments", {})) if configuration or secrets: arguments = substitute(arguments, configuration, secrets) sig = inspect.signature(func) if "secrets" in provider and "secrets" in sig.parameters: arguments["secrets"] = {} for s in provider["secrets"]: arguments["secrets"].update(secrets.get(s, {}).copy()) if "configuration" in sig.parameters: arguments["configuration"] = configuration.copy() if "state" in sig.parameters: arguments["state"] = state if "experiment" in sig.parameters: arguments["experiment"] = experiment func(context=context, **arguments)
def run_process_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run the a process activity. A process activity is an executable the current user is allowed to apply. The raw result of that command is returned as bytes of this activity. Raises :exc:`ActivityFailed` when a the process takes longer than the timeout defined in the activity. There is no timeout by default so be careful when you do not explicitly provide one. This should be considered as a private function. """ provider = activity["provider"] timeout = provider.get("timeout", None) arguments = provider.get("arguments", []) if arguments and (configuration or secrets): arguments = substitute(arguments, configuration, secrets) shell = False path = shutil.which(os.path.expanduser(provider["path"])) if isinstance(arguments, str): shell = True arguments = "{} {}".format(path, arguments) else: if isinstance(arguments, dict): arguments = itertools.chain.from_iterable(arguments.items()) arguments = list([str(p) for p in arguments if p not in (None, "")]) arguments.insert(0, path) try: logger.debug("Running: {a}".format(a=str(arguments))) proc = subprocess.run(arguments, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ, shell=shell) except subprocess.TimeoutExpired: raise ActivityFailed("process activity took too long to complete") # kind warning to the user that this process returned a non--zero # exit code, as traditionally used to indicate a failure, # but not during the hypothesis check because that could also be # exactly what the user want. This warning is helpful during the # method and rollbacks if "tolerance" not in activity and proc.returncode > 0: logger.warning( "This process returned a non-zero exit code. " "This may indicate some error and not what you expected. " "Please have a look at the logs.") stdout = decode_bytes(proc.stdout) stderr = decode_bytes(proc.stderr) return {"status": proc.returncode, "stdout": stdout, "stderr": stderr}
def test_use_nested_object_as_substitution(): config = load_configuration( {"nested": { "onea": "fdsfdsf", "lol": { "haha": [1, 2, 3] } }}) result = substitute("${nested}", configuration=config, secrets=None) assert isinstance(result, dict) assert result == {"onea": "fdsfdsf", "lol": {"haha": [1, 2, 3]}}
def run_process_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run the a process activity. A process activity is an executable the current user is allowed to apply. The raw result of that command is returned as bytes of this activity. Raises :exc:`ActivityFailed` when a the process takes longer than the timeout defined in the activity. There is no timeout by default so be careful when you do not explicitly provide one. This should be considered as a private function. """ provider = activity["provider"] timeout = provider.get("timeout", None) arguments = provider.get("arguments", []) if arguments and (configuration or secrets): arguments = substitute(arguments, configuration, secrets) shell = False path = shutil.which(provider["path"]) if isinstance(arguments, str): shell = True arguments = "{} {}".format(path, arguments) else: if isinstance(arguments, dict): arguments = itertools.chain.from_iterable(arguments.items()) arguments = list([str(p) for p in arguments if p not in (None, "")]) arguments.insert(0, path) try: logger.debug("Running: {a}".format(a=str(arguments))) proc = subprocess.run( arguments, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ, shell=shell) except subprocess.TimeoutExpired: raise ActivityFailed("process activity took too long to complete") stdout = decode_bytes(proc.stdout) stderr = decode_bytes(proc.stderr) return { "status": proc.returncode, "stdout": stdout, "stderr": stderr }
def run_process_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run the a process activity. A process activity is an executable the current user is allowed to apply. The raw result of that command is returned as bytes of this activity. Raises :exc:`FailedActivity` when a the process takes longer than the timeout defined in the activity. There is no timeout by default so be careful when you do not explicitely provide one. This should be considered as a private function. """ provider = activity["provider"] timeout = provider.get("timeout", None) arguments = provider["arguments"] if configuration or secrets: arguments = substitute(arguments, configuration, secrets) if isinstance(arguments, dict): chain = itertools.chain.from_iterable(arguments.items()) else: chain = arguments args = list([str(p) for p in chain if p not in (None, "")]) args.insert(0, shutil.which(provider["path"])) try: logger.debug("Running: {a}".format(a=" ".join(args))) proc = subprocess.run(args, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ) except subprocess.TimeoutExpired: raise FailedActivity("process activity took too long to complete") return (proc.returncode, proc.stdout.decode('utf-8'), proc.stderr.decode('utf-8'))
def test_use_integer_as_substitution(): config = load_configuration({"value": 8}) result = substitute("${value}", configuration=config, secrets=None) assert isinstance(result, int) assert result == 8
def test_substitute_strings_from_configuration(): new_args = substitute("hello ${name}", config.SomeConfig, None) assert new_args == "hello Jane"
def test_do_not_fail_when_key_is_missing(): args = {"message": "hello ${firstname}"} new_args = substitute(args, config.SomeConfig, None) assert new_args["message"] == "hello ${firstname}"
def test_substitute_from_config_and_secrets_with_priority_to_config(): args = {"message": "hello ${name}"} new_args = substitute(args, config.SomeConfig, {"ident": {"name": "Joe"}}) assert new_args["message"] == "hello Jane"
def test_substitute_from_secrets(): args = {"message": "hello ${name}"} new_args = substitute(args, None, {"ident": {"name": "Joe"}}) assert new_args["message"] == "hello Joe"
def _run( self, strategy: Strategy, schedule: Schedule, # noqa: C901 experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, settings: Settings, event_registry: EventHandlerRegistry) -> None: experiment["title"] = substitute(experiment["title"], configuration, secrets) logger.info("Running experiment: {t}".format(t=experiment["title"])) started_at = time.time() journal = journal or initialize_run_journal(experiment) event_registry.started(experiment, journal) control = Control() activity_pool, rollback_pool = get_background_pools(experiment) hypo_pool = get_hypothesis_pool() continous_hypo_event = threading.Event() dry = experiment.get("dry", False) if dry: logger.warning("Dry mode enabled") initialize_global_controls(experiment, configuration, secrets, settings) initialize_controls(experiment, configuration, secrets) logger.info("Steady-state strategy: {}".format(strategy.value)) rollback_strategy = settings.get("runtime", {}).get("rollbacks", {}).get( "strategy", "default") logger.info("Rollbacks strategy: {}".format(rollback_strategy)) exit_gracefully_with_rollbacks = True with_ssh = has_steady_state_hypothesis_with_probes(experiment) if not with_ssh: logger.info("No steady state hypothesis defined. That's ok, just " "exploring.") try: try: control.begin("experiment", experiment, experiment, configuration, secrets) state = object() if with_ssh and should_run_before_method(strategy): state = run_gate_hypothesis(experiment, journal, configuration, secrets, event_registry, dry) if state is not None: if with_ssh and should_run_during_method(strategy): run_hypothesis_during_method(hypo_pool, continous_hypo_event, strategy, schedule, experiment, journal, configuration, secrets, event_registry, dry) state = run_method(strategy, activity_pool, experiment, journal, configuration, secrets, event_registry, dry) continous_hypo_event.set() if journal["status"] not in ["interrupted", "aborted"]: if with_ssh and (state is not None) and \ should_run_after_method(strategy): run_deviation_validation_hypothesis( experiment, journal, configuration, secrets, event_registry, dry) except InterruptExecution as i: journal["status"] = "interrupted" logger.fatal(str(i)) event_registry.interrupted(experiment, journal) except KeyboardInterrupt: journal["status"] = "interrupted" logger.warning("Received a termination signal (Ctrl-C)...") event_registry.signal_exit() except SystemExit as x: journal["status"] = "interrupted" logger.warning("Received the exit signal: {}".format(x.code)) exit_gracefully_with_rollbacks = x.code != 30 if not exit_gracefully_with_rollbacks: logger.warning("Ignoring rollbacks as per signal") event_registry.signal_exit() finally: hypo_pool.shutdown(wait=True) # just in case a signal overrode everything else to tell us not to # play them anyway (see the exit.py module) if exit_gracefully_with_rollbacks: run_rollback(rollback_strategy, rollback_pool, experiment, journal, configuration, secrets, event_registry, dry) journal["end"] = datetime.utcnow().isoformat() journal["duration"] = time.time() - started_at # the spec only allows these statuses, so if it's anything else # we override to "completed" if journal["status"] not in ("completed", "failed", "aborted", "interrupted"): journal["status"] = "completed" has_deviated = journal["deviated"] status = "deviated" if has_deviated else journal["status"] logger.info("Experiment ended with status: {s}".format(s=status)) if has_deviated: logger.info( "The steady-state has deviated, a weakness may have been " "discovered") control.with_state(journal) try: control.end("experiment", experiment, experiment, configuration, secrets) except ChaosException: logger.debug("Failed to close controls", exc_info=True) finally: try: cleanup_controls(experiment) cleanup_global_controls() finally: event_registry.finish(journal) return journal
def _(tolerance: dict, value: Any, configuration: Configuration = None, secrets: Secrets = None) -> bool: tolerance_type = tolerance.get("type") if tolerance_type == "probe": tolerance["provider"]["arguments"]["value"] = value try: rtn = run_activity(tolerance, configuration, secrets) if rtn: return True else: return False except ActivityFailed: return False elif tolerance_type == "regex": target = tolerance.get("target") pattern = tolerance.get("pattern") pattern = substitute(pattern, configuration, secrets) logger.debug("Applied pattern is: {}".format(pattern)) rx = re.compile(pattern) if target: value = value.get(target, value) return rx.search(value) is not None elif tolerance_type == "jsonpath": target = tolerance.get("target") path = tolerance.get("path") count_value = tolerance.get("count", None) path = substitute(path, configuration, secrets) logger.debug("Applied jsonpath is: {}".format(path)) px = JSONPath.parse_str(path) if target: # if no target was provided, we use the tested value as-is value = value.get(target, value) if isinstance(value, bytes): value = value.decode('utf-8') if isinstance(value, str): try: value = json.loads(value) except json.decoder.JSONDecodeError: pass values = list(map(lambda m: m.current_value, px.match(value))) result = len(values) > 0 if count_value is not None: result = len(values) == count_value expect = tolerance.get("expect") if "expect" in tolerance: if not isinstance(expect, list): result = values == [expect] else: result = values == expect if result is False: if "expect" in tolerance: logger.debug("jsonpath found '{}' but expected '{}'".format( str(values), str(tolerance["expect"]))) else: logger.debug("jsonpath found '{}'".format(str(values))) return result elif tolerance_type == "range": target = tolerance.get("target") if target: value = value.get(target, value) try: value = Decimal(value) except InvalidOperation: logger.debug("range check expects a number value") return False the_range = tolerance.get("range") min_value = the_range[0] max_value = the_range[1] return Decimal(min_value) <= value <= Decimal(max_value)
def execute_activity( experiment: Experiment, activity: Activity, configuration: Configuration, secrets: Secrets, dry: Dry, ) -> Run: """ Low-level wrapper around the actual activity provider call to collect some meta data (like duration, start/end time, exceptions...) during the run. """ ref = activity.get("ref") if ref: activity = lookup_activity(ref) if not activity: raise ActivityFailed(f"could not find referenced activity '{ref}'") with controls( level="activity", experiment=experiment, context=activity, configuration=configuration, secrets=secrets, ) as control: dry = activity.get("dry", dry) pauses = activity.get("pauses", {}) pauses = substitute(pauses, configuration, secrets) pause_before = pauses.get("before") is_dry = False activity_type = activity["type"] if dry == Dry.ACTIONS: is_dry = activity_type == "action" elif dry == Dry.PROBES: is_dry = activity_type == "probe" elif dry == Dry.ACTIVITIES: is_dry = True if pause_before: logger.info(f"Pausing before next activity for {pause_before}s...") # pause when one of the dry flags are set if dry != Dry.PAUSE and not is_dry: time.sleep(pause_before) if activity.get("background"): logger.info("{t}: {n} [in background]".format( t=activity["type"].title(), n=activity.get("name"))) else: logger.info("{t}: {n}".format(t=activity["type"].title(), n=activity.get("name"))) start = datetime.utcnow() run = {"activity": activity.copy(), "output": None} result = None interrupted = False try: # pause when one of the dry flags are set if not is_dry: result = run_activity(activity, configuration, secrets) run["output"] = result run["status"] = "succeeded" if result is not None: logger.debug(f" => succeeded with '{result}'") else: logger.debug(" => succeeded without any result value") except ActivityFailed as x: error_msg = str(x) run["status"] = "failed" run["output"] = result run["exception"] = traceback.format_exception(type(x), x, None) logger.error(f" => failed: {error_msg}") finally: # capture the end time before we pause end = datetime.utcnow() run["start"] = start.isoformat() run["end"] = end.isoformat() run["duration"] = (end - start).total_seconds() pause_after = pauses.get("after") if pause_after and not interrupted: logger.info(f"Pausing after activity for {pause_after}s...") # pause when one of the dry flags are set if dry != Dry.PAUSE and not is_dry: time.sleep(pause_after) control.with_state(run) return run
def test_always_return_to_string_when_pattern_is_not_alone(): config = load_configuration({"value": 8}) result = substitute("hello ${value}", configuration=config, secrets=None) assert isinstance(result, str) assert result == "hello 8"
def run_http_activity(activity: Activity, configuration: Configuration, secrets: Secrets) -> Any: """ Run a HTTP activity. A HTTP activity is a call to a HTTP endpoint and its result is returned as the raw result of this activity. Raises :exc:`ActivityFailed` when a timeout occurs for the request or when the endpoint returns a status in the 400 or 500 ranges. This should be considered as a private function. """ provider = activity["provider"] url = substitute(provider["url"], configuration, secrets) method = provider.get("method", "GET").upper() headers = substitute(provider.get("headers", None), configuration, secrets) timeout = provider.get("timeout", None) arguments = provider.get("arguments", None) verify_tls = provider.get("verify_tls", True) if arguments and (configuration or secrets): arguments = substitute(arguments, configuration, secrets) if isinstance(timeout, list): timeout = tuple(timeout) try: if method == "GET": r = requests.get(url, params=arguments, headers=headers, timeout=timeout, verify=verify_tls) else: if headers and headers.get("Content-Type") == "application/json": r = requests.request(method, url, json=arguments, headers=headers, timeout=timeout, verify=verify_tls) else: r = requests.request(method, url, data=arguments, headers=headers, timeout=timeout, verify=verify_tls) body = None if r.headers.get("Content-Type") == "application/json": body = r.json() else: body = r.text # kind warning to the user that this HTTP call may be invalid # but not during the hypothesis check because that could also be # exactly what the user want. This warning is helpful during the # method and rollbacks if "tolerance" not in activity and r.status_code > 399: logger.warning( "This HTTP call returned a response with a HTTP status code " "above 400. This may indicate some error and not " "what you expected. Please have a look at the logs.") return { "status": r.status_code, "headers": dict(**r.headers), "body": body } except requests.exceptions.ConnectionError as cex: raise ActivityFailed("failed to connect to {u}: {x}".format( u=url, x=str(cex))) except requests.exceptions.Timeout: raise ActivityFailed("activity took too long to complete")
def test_substitute_from_configuration(): args = {"message": "hello ${name}"} new_args = substitute(args, config.SomeConfig, None) assert new_args["message"] == "hello Jane"