def initialize_global_controls(experiment: Experiment, configuration: Configuration, secrets: Secrets, settings: Settings): """ Load and initialize controls declared in the settings. Notice, if a control fails during its initialization, it is not registered at all and will not be applied throughout the experiment. """ controls = [] for name, control in settings.get("controls", {}).items(): control['name'] = name logger.debug("Initializing global control '{}'".format(name)) provider = control.get("provider") if provider and provider["type"] == "python": try: initialize_control( control, experiment=experiment, configuration=configuration, secrets=secrets, settings=settings) except Exception: logger.debug( "Control initialization '{}' failed. " "It will not be registered.".format( control['name']), exc_info=True) # we don't use a control that failed its initialization continue controls.append(control) set_global_controls(controls)
def load_experiment(experiment_source: str, settings: Settings = None, verify_tls: bool = True) -> Experiment: """ Load an experiment from the given source. The source may be a local file or a HTTP(s) URL. If the endpoint requires authentication, please set the appropriate entry in the settings file, under the `auths:` section, keyed by domain. For instance: ```yaml auths: mydomain.com: type: basic value: XYZ otherdomain.com: type: bearer value: UIY localhost:8081: type: digest value: UIY ``` Set `verify_tls` to `False` if the source is a over a self-signed certificate HTTP endpoint to instruct the loader to not verify the certificates. """ with controls(level="loader", context=experiment_source) as control: if os.path.exists(experiment_source): parsed = parse_experiment_from_file(experiment_source) control.with_state(parsed) return parsed p = urlparse(experiment_source) if not p.scheme and not os.path.exists(p.path): raise InvalidSource('Path "{}" does not exist.'.format(p.path)) if p.scheme not in ("http", "https"): raise InvalidSource( "'{}' is not a supported source scheme.".format(p.scheme)) headers = {"Accept": "application/json, application/x-yaml"} if settings: auths = settings.get("auths", []) for domain in auths: if domain == p.netloc: auth = auths[domain] headers["Authorization"] = "{} {}".format( auth["type"], auth["value"]) break r = requests.get(experiment_source, headers=headers, verify=verify_tls) if r.status_code != 200: raise InvalidSource("Failed to fetch the experiment: {}".format( r.text)) logger.debug("Fetched experiment: \n{}".format(r.text)) parsed = parse_experiment_from_http(r) control.with_state(parsed) return parsed
def configure_control(config: Configuration, secrets: Secrets, settings: Settings): global value_from_config if config: value_from_config = config.get("dummy-key", "default") elif settings: value_from_config = settings.get("dummy-key", "default")
def get_verify_tls(settings: Settings) -> str: """ Get the configured tls verify of the ChaosIQ endpoint. """ return settings.get('controls', {}).\ get('chaosiq-cloud', {}).\ get('provider', {}).\ get('arguments', {}).\ get('verify_tls')
def configure_control( experiment: Experiment, configuration: Configuration, secrets: Secrets, settings: Settings, ): if configuration: experiment["control-value"] = configuration.get("dummy-key", "default") elif settings: experiment["control-value"] = settings.get("dummy-key", "default")
def get_endpoint_url(settings: Settings, default='https://console.chaosiq.io') -> str: """ Get the configured URL of the ChaosIQ endpoint. """ return settings.get('controls', {}).\ get('chaosiq-cloud', {}).\ get('provider', {}).\ get('arguments', {}).\ get('url', default)
def load_experiment(experiment_source: str, settings: Settings = None) -> Experiment: """ Load an experiment from the given source. The source may be a local file or a HTTP(s) URL. If the endpoint requires authentication, please set the appropriate entry in the settings file, under the `auths:` section, keyed by domain. For instance: ```yaml auths: mydomain.com: type: basic value: XYZ otherdomain.com: type: bearer value: UIY localhost:8081: type: digest value: UIY ``` """ if os.path.exists(experiment_source): return parse_experiment_from_file(experiment_source) p = urlparse(experiment_source) if not p.scheme and not os.path.exists(p.path): raise InvalidSource('Path "{}" does not exist.'.format(p.path)) if p.scheme not in ("http", "https"): raise InvalidSource("'{}' is not a supported source scheme.".format( p.scheme)) headers = {"Accept": "application/json, application/x-yaml"} if settings: auths = settings.get("auths", []) for domain in auths: if domain == p.netloc: auth = auths[domain] headers["Authorization"] = '{} {}'.format( auth["type"], auth["value"]) break r = requests.get(experiment_source, headers=headers) if r.status_code != 200: raise InvalidSource("Failed to fetch the experiment: {}".format( r.text)) logger.debug("Fetched experiment: \n{}".format(r.text)) return parse_experiment_from_http(r)
def get_context(experiment: Experiment, source: str, org: str, workspace: str, settings: Settings) -> Context: """ Load the current Chaos Hub context from the given parameters, in the following order (higher has more precedence): * as passed to the command line * from the "chaoshub" extension block (if any) in the experiment * from the settings under the chaoshub vendor section in the settings We may parse the URL from the source some day but for now, this sounds a little flaky. Additionaly, load the hub_url and token from the extension plugin settings. """ token = hub_url = None extension = get_extension(experiment, "chaoshub") if extension: if not org: org = extension.get("organization") if not workspace: workspace = extension.get("workspace") if settings: plugin = settings.get('vendor', {}).get('chaoshub', {}) if not org: org = plugin.get('organization') if not workspace: workspace = plugin.get('workspace') hub_url = plugin.get('hub_url') token = plugin.get('token') context = Context( org=org, workspace=workspace, experiment=extension.get("experiment") if extension else None, hub_url=hub_url, token=token ) return context
def initialize_global_controls(settings: Settings): """ Load and initialize controls declared in the settings """ controls = [] for name, control in settings.get("controls", {}).items(): control['name'] = name logger.debug("Initializing global control '{}'".format(name)) provider = control.get("provider") if provider and provider["type"] == "python": initialize_control(control, configuration=None, secrets=None, settings=settings) controls.append(control) global_controls.set(controls)
def load_global_controls(settings: Settings): """ Import all controls declared in the settings and global to all experiments. This is called as early as possible, even before we loaded the experiment so the loaders controls have a chance to be applied. It does not perform any specific initialization yet, it only tries to load the controls declared in the settings. """ controls = [] for name, control in settings.get("controls", {}).items(): control['name'] = name logger.debug("Loading global control '{}'".format(name)) provider = control.get("provider") if provider and provider["type"] == "python": mod = import_control(control) if not mod: continue controls.append(control) set_global_controls(controls)
def client_session(url: str, organizations: List[Dict[str, str]], verify_tls: bool = True, settings: Settings = None) \ -> Generator[Session, None, None]: """ Creates a HTTP session that injects authorization header into each request made with this session. """ org = get_default_org(organizations) team = get_default_team(org) org_id = org["id"] team_id = team["id"] host = urls.host(url) headers = { "Accept": "application/json", } # read the token from the auths block settings = settings or {} auths = settings.get('auths') if auths: host_auth = auths.get(host) if not host_auth: logger.debug( "Your settings are missing an authentication declaration for " "'{}'. Have you run 'chaos login'?".format(host)) else: auth_type = host_auth.get('type', 'bearer') token = host_auth.get('value') headers["Authorization"] = "{} {}".format( auth_type.capitalize(), token) with Session() as s: s.base_url = build_base_url(url, org_id, team_id) s.headers.update(headers) s.verify = verify_tls yield s
def get_api_token(settings: Settings, url: str = DEFAULT_PROOFDOCK_API_URL) \ -> str: """Get the token for the Proofdock API endpoint. """ return settings.get('auths', {}).get(urlparse(url).netloc, {}).get('value', '')
def _get_control(settings: Settings) -> Control: controls = settings.setdefault('controls', {}) return controls.setdefault('proofdock', {})
def _run( self, strategy: Strategy, schedule: Schedule, # noqa: C901 experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, settings: Settings, event_registry: EventHandlerRegistry) -> None: experiment["title"] = substitute(experiment["title"], configuration, secrets) logger.info("Running experiment: {t}".format(t=experiment["title"])) started_at = time.time() journal = journal or initialize_run_journal(experiment) event_registry.started(experiment, journal) control = Control() activity_pool, rollback_pool = get_background_pools(experiment) hypo_pool = get_hypothesis_pool() continous_hypo_event = threading.Event() dry = experiment.get("dry", False) if dry: logger.warning("Dry mode enabled") initialize_global_controls(experiment, configuration, secrets, settings) initialize_controls(experiment, configuration, secrets) logger.info("Steady-state strategy: {}".format(strategy.value)) rollback_strategy = settings.get("runtime", {}).get("rollbacks", {}).get( "strategy", "default") logger.info("Rollbacks strategy: {}".format(rollback_strategy)) exit_gracefully_with_rollbacks = True with_ssh = has_steady_state_hypothesis_with_probes(experiment) if not with_ssh: logger.info("No steady state hypothesis defined. That's ok, just " "exploring.") try: try: control.begin("experiment", experiment, experiment, configuration, secrets) state = object() if with_ssh and should_run_before_method(strategy): state = run_gate_hypothesis(experiment, journal, configuration, secrets, event_registry, dry) if state is not None: if with_ssh and should_run_during_method(strategy): run_hypothesis_during_method(hypo_pool, continous_hypo_event, strategy, schedule, experiment, journal, configuration, secrets, event_registry, dry) state = run_method(strategy, activity_pool, experiment, journal, configuration, secrets, event_registry, dry) continous_hypo_event.set() if journal["status"] not in ["interrupted", "aborted"]: if with_ssh and (state is not None) and \ should_run_after_method(strategy): run_deviation_validation_hypothesis( experiment, journal, configuration, secrets, event_registry, dry) except InterruptExecution as i: journal["status"] = "interrupted" logger.fatal(str(i)) event_registry.interrupted(experiment, journal) except KeyboardInterrupt: journal["status"] = "interrupted" logger.warning("Received a termination signal (Ctrl-C)...") event_registry.signal_exit() except SystemExit as x: journal["status"] = "interrupted" logger.warning("Received the exit signal: {}".format(x.code)) exit_gracefully_with_rollbacks = x.code != 30 if not exit_gracefully_with_rollbacks: logger.warning("Ignoring rollbacks as per signal") event_registry.signal_exit() finally: hypo_pool.shutdown(wait=True) # just in case a signal overrode everything else to tell us not to # play them anyway (see the exit.py module) if exit_gracefully_with_rollbacks: run_rollback(rollback_strategy, rollback_pool, experiment, journal, configuration, secrets, event_registry, dry) journal["end"] = datetime.utcnow().isoformat() journal["duration"] = time.time() - started_at # the spec only allows these statuses, so if it's anything else # we override to "completed" if journal["status"] not in ("completed", "failed", "aborted", "interrupted"): journal["status"] = "completed" has_deviated = journal["deviated"] status = "deviated" if has_deviated else journal["status"] logger.info("Experiment ended with status: {s}".format(s=status)) if has_deviated: logger.info( "The steady-state has deviated, a weakness may have been " "discovered") control.with_state(journal) try: control.end("experiment", experiment, experiment, configuration, secrets) except ChaosException: logger.debug("Failed to close controls", exc_info=True) finally: try: cleanup_controls(experiment) cleanup_global_controls() finally: event_registry.finish(journal) return journal
def notify( settings: Settings, event: FlowEvent, payload: Any = None, #noqa: C901 error: Any = None): """ Go through all the notification channels declared in the settings and call them one by one. Only call those matching the current event. As this function is blocking, make sure none of your channels take too long to run. Whenever an error happened in the notification, a debug message is logged into the chaostoolkit log for review but this should not impact the experiment itself. When no settings were provided, no notifications are sent. Equally, if the settings do not define a `notifications` entry. Here is an example of settings: ```yaml notifications: - type: plugin module: somepackage.somemodule events: - init-failed - run-failed - type: http url: http://example.com headers: Authorization: "Bearer token" - type: http url: https://private.com verify_tls: false forward_event_payload: false headers: Authorization: "Bearer token" events: - discovery-completed - run-failed ``` In this sample, the first channel will be the `notify` function of the `somepackage.somemopdule` Python module. The other two notifications will be sent over HTTP with the third one not forwarding the event payload itself (hence being a GET rather than a POST). Notice how the first and third channels take an `events` sequence. That list represents the events which those endpoints are interested in. In other words, they will only be called for those specific events. The second channel will be applied to all events. The payload event is a dictionary made of the following entries: - `"event"`: the event name - `"payload"`: the payload associated to this event (may be None) - `"phase"`: which phase this event was raised from - `"error"`: if an error was passed on to the function - `"ts"`: a UTC timestamp of when the event was raised """ if not settings: return notification_channels = settings.get("notifications") if not notification_channels: return event_payload = { "name": event.value, "payload": payload, "phase": "unknown", "ts": datetime.utcnow().replace(tzinfo=timezone.utc).timestamp() } if error: event_payload["error"] = error event_class = event.__class__ if event_class is DiscoverFlowEvent: event_payload["phase"] = "discovery" elif event_class is InitFlowEvent: event_payload["phase"] = "init" elif event_class is RunFlowEvent: event_payload["phase"] = "run" elif event_class is ValidateFlowEvent: event_payload["phase"] = "validate" for channel in notification_channels: events = channel.get("events") if events and event.value not in events: continue channel_type = channel.get("type") if channel_type == "http": notify_with_http(channel, event_payload) elif channel_type == "plugin": notify_via_plugin(channel, event_payload)
def verify_tls_certs(settings: Settings) -> bool: return settings.get('controls', {}).\ get('chaosiq-cloud', {}).\ get('provider', {}).\ get('arguments', {}).\ get('verify_tls', True)
def get_control(settings: Settings) -> Control: if not settings: return controls = settings.setdefault('controls', {}) return controls.setdefault('chaosiq-cloud', {})
def has_chaosiq_extension_configured(settings: Settings) -> bool: """ Lookup for the chaosiq control extension. """ return settings.get('controls', {}).get('chaosiq-cloud') is not None