예제 #1
0
def _terminate_now(signum: int, frame: FrameType = None) -> None:
    """
    Signal handler for the SIGTERM event. Raises an `InterruptExecution`.
    """
    if signum == signal.SIGTERM:
        logger.warning("Caught SIGTERM signal, interrupting experiment now")
        raise InterruptExecution("SIGTERM signal received")
예제 #2
0
def get_or_raise(value: str = "AZURE_PUBLIC_CLOUD") -> azure_cloud.Cloud:
    """ Returns the proper Azure cloud object or raises
     an InterruptException if not found """

    if not value:
        logger.warn("Azure cloud not provided. Using"
                    " AZURE_PUBLIC_CLOUD as default")
        return azure_cloud.AZURE_PUBLIC_CLOUD

    cloud = value.strip().upper()

    if cloud == AZURE_PUBLIC_CLOUD:
        result = azure_cloud.AZURE_PUBLIC_CLOUD
    elif cloud == AZURE_CHINA_CLOUD:
        result = azure_cloud.AZURE_CHINA_CLOUD
    elif cloud == AZURE_US_GOV_CLOUD:
        result = azure_cloud.AZURE_US_GOV_CLOUD
    elif cloud == AZURE_GERMAN_CLOUD:
        result = azure_cloud.AZURE_GERMAN_CLOUD

    else:
        msg = "Invalid Azure cloud '{}'. Please " \
              "provide a proper cloud value".format(cloud)
        logger.info(msg)
        raise InterruptExecution(msg)

    return result
예제 #3
0
def run(resource_group: str, compute: dict, parameters: dict,
        client: ComputeManagementClient):
    compute_type = compute.get('type').lower()

    try:
        if compute_type == RES_TYPE_VMSS_VM.lower():
            poller = client.virtual_machine_scale_set_vms.begin_run_command(
                resource_group, compute['scale_set'], compute['instance_id'],
                parameters)

        elif compute_type == RES_TYPE_VM.lower():
            poller = client.virtual_machines.begin_run_command(
                resource_group, compute['name'], parameters)

        else:
            msg = "Running a command for the unknown resource type '{}'".format(
                compute.get('type'))
            raise InterruptExecution(msg)

    except HttpResponseError as e:
        raise FailedActivity(e.message)

    result = poller.result()  # Blocking till executed
    if result and result.value:
        logger.debug(result.value[0].message)  # stdout/stderr
    else:
        raise FailedActivity(
            "Operation did not finish properly."
            " You may consider to increase the timeout in the experiment configuration."
        )
예제 #4
0
def is_allowed_to_continue(session: requests.Session,
                           extensions: List[Extension]) -> NoReturn:
    """
    Query the runtime policy and return a boolean indicating if the execution
    may carry on or not.
    """
    experiment_id = get_experiment_id(extensions)
    if not experiment_id:
        return

    execution_id = get_execution_id(extensions)
    if not execution_id:
        return

    safeguards_url = urls.safeguard(urls.execution(
        urls.experiment(session.base_url, experiment_id=experiment_id),
        execution_id=execution_id))
    r = session.get(safeguards_url)
    if r.status_code > 399:
        return

    state = r.json()
    if state.get("allowed", True) is False:
        safeguards = "\n".join([p["name"] for p in state.get("policies")])
        with state_lock:
            safeguards_state[execution_id] = deepcopy(state.get("policies"))

        raise InterruptExecution(
            "The following safe guards disallow this execution from "
            "continuing:\n{}".format(safeguards)
        )
예제 #5
0
def run(resource_group: str, compute: dict, timeout: int, parameters: dict,
        secrets, configuration):
    client = init_compute_management_client(secrets, configuration)

    compute_type = compute.get('type').lower()
    if compute_type == RES_TYPE_VMSS_VM.lower():
        poller = client.virtual_machine_scale_set_vms.run_command(
            resource_group, compute['scale_set'],
            compute['instance_id'], parameters)

    elif compute_type == RES_TYPE_VM.lower():
        poller = client.virtual_machines.run_command(
            resource_group, compute['name'], parameters)

    else:
        msg = "Trying to run a command for the unknown resource type '{}'" \
            .format(compute.get('type'))
        raise InterruptExecution(msg)

    result = poller.result(timeout)  # Blocking till executed
    if result and result.value:
        logger.debug(result.value[0].message)  # stdout/stderr
    else:
        raise FailedActivity("Operation did not finish properly."
                             " You may consider increasing timeout setting.")
예제 #6
0
파일: control.py 프로젝트: chaosiq/chaosiq
def before_activity_control(context: Activity, **kwargs):
    """
    Prompt for Yes or No to executing an activity.
    """
    logger.info("About to execute activity: " + context.get("name"))
    if click.confirm('Do you want to continue?'):
        logger.info("Continuing: " + context.get("name"))
    else:
        raise InterruptExecution("Experiment manually interrupted")
예제 #7
0
def __authentication_type(secrets: dict) -> str:
    if 'client_secret' in secrets and secrets['client_secret']:
        return SERVICE_PRINCIPAL

    elif 'access_token' in secrets and secrets['access_token']:
        return AAD_TOKEN

    else:
        raise InterruptExecution("Authentication to Azure requires a"
                                 " client secret or an access token")
예제 #8
0
def send_experiment_event(event: str, context: dict, state: dict,
                          settings: Settings):
    try:
        with client_session(verify_tls=False, settings=settings) as session:
            publish_event(event, context, state, settings, session)
    except Exception as ex:
        logger.error(
            "Could not update experiment state in the Proofdock "
            "cloud. %s", str(ex))
        logger.debug(ex)
        raise InterruptExecution()
예제 #9
0
def prepare(machine: dict, script: str):
    os_type = __get_os_type(machine)
    if os_type == OS_LINUX:
        command_id = 'RunShellScript'
        script_name = "{}.sh".format(script)
    else:
        if script in UNSUPPORTED_WINDOWS_SCRIPTS:
            raise InterruptExecution(
                "'{}' is not supported for os '{}'".format(script, OS_WINDOWS))
        command_id = 'RunPowerShellScript'
        script_name = "{}.ps1".format(script)

    return command_id, script_name
예제 #10
0
def __get_credentials(creds: dict) -> ServicePrincipalCredentials:
    if creds['azure_client_secret'] is not None:
        credentials = ServicePrincipalCredentials(
            client_id=creds['azure_client_id'],
            secret=creds['azure_client_secret'],
            tenant=creds['azure_tenant_id'],
            cloud_environment=__get_cloud_env_by_name(creds['azure_cloud']))
    elif creds['access_token'] is not None:
        token = dict(accessToken=creds['access_token'])
        credentials = AADTokenCredentials(token, creds['azure_client_id'])
    else:
        raise InterruptExecution("Authentication to Azure requires a"
                                 " client secret or an access token")
    return credentials
예제 #11
0
def fetch_instances(vmss, instance_filter: str,
                    client: ComputeManagementClient) -> List[Dict[str, Any]]:
    if not instance_filter:
        instance_filter = "sample 1"

    try:
        instances = fetch_all_vmss_instances(vmss, client)
        result = kustolight.filter_resources(instances, instance_filter)
    except jmespath.exceptions.ParseError:
        raise InterruptExecution(
            "'{}' is an invalid query. Please have a look at the documentation."
            .format(instance_filter))

    return result
예제 #12
0
def __create(secrets: Dict) -> AADMixin:
    _auth_type = __authentication_type(secrets)

    if _auth_type == SERVICE_PRINCIPAL:
        _authentication = ServicePrincipalAuth()

    elif _auth_type == AAD_TOKEN:
        _authentication = TokenAuth()

    try:
        result = _authentication.create(secrets)
        return result
    except AuthenticationError as e:
        msg = e.inner_exception.error_response.get('error_description')
        raise InterruptExecution(msg)
예제 #13
0
def prepare(compute: dict, script: str):
    os_type = __get_os_type(compute)
    if os_type == OS_LINUX:
        command_id = 'RunShellScript'
        script_name = "{}.sh".format(script)
    else:
        if script in UNSUPPORTED_WINDOWS_SCRIPTS:
            raise InterruptExecution("'{}' is not supported for os '{}'"
                                     .format(script, OS_WINDOWS))
        command_id = 'RunPowerShellScript'
        script_name = "{}.ps1".format(script)

    file_path = os.path.join(
        os.path.dirname(__file__), "../scripts", script_name)
    with open(file_path) as file_path:
        script_content = file_path.read()
        return command_id, script_content
예제 #14
0
def __get_os_type(compute):
    compute_type = compute['type'].lower()

    if compute_type == RES_TYPE_VMSS_VM.lower():
        os_type = compute['storage_profile']['os_disk']['os_type']

    elif compute_type == RES_TYPE_VM.lower():
        os_type = compute['properties']['storageProfile']['osDisk']['osType']

    else:
        msg = "Trying to run a command for the unknown resource type '{}'" \
            .format(compute.get('type'))
        raise InterruptExecution(msg)

    if os_type.lower() not in (OS_LINUX, OS_WINDOWS):
        raise FailedActivity("Unknown OS Type: %s" % os_type)

    return os_type.lower()
def fetch_resources(input_query: str, resource_type: str, secrets: Secrets,
                    configuration: Configuration):
    # prepare query
    _query = __query_from(resource_type, input_query)
    _query_request = __query_request_from(_query, configuration)

    # prepare resource graph client
    try:
        client = init_resource_graph_client(secrets)
        resources = client.resources(_query_request)
    except HttpResponseError as e:
        msg = e.error.code
        if e.error.details:
            for d in e.error.details:
                msg += ": " + str(d)
        raise InterruptExecution(msg)

    # prepare results
    results = __to_dicts(resources.data)
    return results
예제 #16
0
    def started(self, experiment: Experiment, journal: Journal) -> None:
        """
        Notify the ChaosIQ service the verification has now started.

        Provide it with the current journal and status.
        """
        self._start_time = datetime.now()
        base_endpoint, verify_tls, orgs = get_call_context(self.settings)
        with client_session(base_endpoint, orgs, verify_tls,
                            self.settings) as session:
            r = initialize_execution(session, experiment, journal)
            if r.status_code not in [200, 201]:
                raise InterruptExecution(
                    "It is possible you are trying to run a verification "
                    "against a team that is not the active team of the `chaos` "  # noqa: E501
                    "session. Please run `chaos team` to switch active team "
                    "then try again. If the problem persists or the team is "
                    "the correct one, please contact the ChaosIQ support.")
            payload = r.json()
            execution_id = payload["id"]

        r = self._make_call("POST",
                            self.verification_run_path,
                            json={
                                "journal": journal,
                                "status": "started",
                                "experiment_id": get_experiment_id(experiment),
                                "execution_id": execution_id
                            })
        error = self.get_error(r)
        if error or (r is None):
            logger.error(
                "Failed to notify verification run was started: {}".format(
                    error))
            return

        payload = r.json()
        self.run_id = payload["id"]
        if self.run_id:
            logger.debug("Verification run '{}' started".format(self.run_id))
            set_run_id(self.run_id, experiment)
예제 #17
0
def fetch_resources(user_query: str, resource_type: str, secrets: Secrets,
                    configuration: Configuration):
    # prepare query
    query_request = query.create_request(resource_type, user_query,
                                         configuration)

    # prepare resource graph client
    try:
        client = init_client(secrets)
        resources = client.resources(query_request)
    except HttpResponseError as e:
        raise InterruptExecution(e.message)

    # prepare results
    results = __to_dicts(resources.data)

    if not results:
        raise FailedActivity(
            "Could not find resources of type '{}' and filter '{}'".format(
                resource_type, user_query))

    return results
예제 #18
0
def before_experiment_control(context: Experiment,
                              configuration: Configuration = None,
                              secrets: Secrets = None,
                              settings: Settings = None,
                              **kwargs):
    """
    before-control of the experiment's execution

    Called by the Chaos Toolkit before the experiment's begin but after the
    configuration and secrets have been loaded.
    """

    if no_upload(settings):
        return

    try:
        logger.info('Creating experiment run in Proofdock...')
        with client_session(verify_tls=False, settings=settings) as session:
            execution = push_execution(settings, session)
        execution_ctx = {
            'id': execution.get('id'),
            'creation_time': execution.get('creation_time')
        }
        add_to_run_context(settings, 'execution', execution_ctx)
        logger.info("New experiment run with id: '{}' created.".format(
            execution.get('id')))
    except Exception as ex:
        logger.error('Could not create experiment run in Proofdock cloud. %s',
                     str(ex))
        logger.debug(ex)
        raise InterruptExecution()

    send_experiment_event(event='before-experiment',
                          context=context,
                          state=None,
                          settings=settings)
예제 #19
0
def prepare(compute: dict, script_id: str):
    """Prepare the script
    :param compute: The instance to be attacked.
    :param script_id: The script's filename without the filename ending. Is named after the activity name.
    :return: A tuple of the Command Id and the script content
    """
    os_type = __get_os_type(compute)

    if os_type == OS_LINUX:
        command_id = 'RunShellScript'
        script_name = "{}.sh".format(script_id)
    else:
        if script_id in UNSUPPORTED_WINDOWS_SCRIPTS:
            raise InterruptExecution(
                "'{}' is not supported for os '{}'".format(
                    script_id, OS_WINDOWS))
        command_id = 'RunPowerShellScript'
        script_name = "{}.ps1".format(script_id)

    file_path = os.path.join(os.path.dirname(__file__), "../scripts",
                             script_name)
    with open(file_path) as file_path:
        script_content = file_path.read()
        return command_id, script_content
예제 #20
0
def auth(secrets: Dict) -> ClientSecretCredential:
    """
    Create Azure authentication client from a provided secrets.

    Service principle and token based auth types are supported. Token
    based auth do not currently support refresh token functionality.

    Type of authentication client is determined based on passed secrets.

    For example, secrets that contains a `client_id`, `client_secret` and
    `tenant_id` will create ServicePrincipalAuth client
    ```python
    {
        "client_id": "AZURE_CLIENT_ID",
        "client_secret": "AZURE_CLIENT_SECRET",
        "tenant_id": "AZURE_TENANT_ID"
    }
    ```
    If you are not working with Public Global Azure, e.g. China Cloud
    you can provide `msrestazure.azure_cloud.Cloud` object. If omitted the
    Public Cloud is taken as default. Please refer to msrestazure.azure_cloud
    ```python
    {
        "client_id": "xxxxxxx",
        "client_secret": "*******",
        "tenant_id": "@@@@@@@@@@@",
        "cloud": "msrestazure.azure_cloud.Cloud"
    }
    ```

    Using this function goes as follows:

    ```python
    with auth(secrets) as cred:
        subscription_id = configuration.get("subscription_id")
        resource_client = ResourceManagementClient(cred, subscription_id)
        compute_client = ComputeManagementClient(cred, subscription_id)
    ```

    Again, if you are not working with Public Azure Cloud,
    and you set azure_cloud in secret,
    this will pass one more parameter `base_url` to above function.
    ```python
    with auth(secrets) as cred:
        cloud = cred.get('cloud')
        client = ComputeManagementClient(
            credentials=cred, subscription_id=subscription_id,
                        base_url=cloud.endpoints.resource_manager)
    ```

    """

    try:
        credential = ClientSecretCredential(
            tenant_id=secrets.get('tenant_id'),
            client_id=secrets.get('client_id'),
            client_secret=secrets.get('client_secret'),
            authority=urlparse(
                secrets.get('cloud').endpoints.active_directory).hostname)
    except ValueError as e:
        raise InterruptExecution(str(e))
    yield credential
예제 #21
0
def before_loading_experiment_control(context: str):
    raise InterruptExecution(f"failed to load: {context}")
예제 #22
0
def after_activity_control(**kwargs):
    raise InterruptExecution()
예제 #23
0
def interrupt_me():
    raise InterruptExecution()
예제 #24
0
def before_activity_control(context: Activity, **kwargs):
    raise InterruptExecution("let's blow this up")
예제 #25
0
def force_interrupting_experiment():
    raise InterruptExecution()
예제 #26
0
def aws_client(resource_name: str, configuration: Configuration = None,
               secrets: Secrets = None):
    """
    Create a boto3 client for the given resource.

    You may pass the `aws_region` key in the `configuration` object to
    be explicit about which region you want to use.

    You may pass `aws_profile_name` value to the `configuration` object so that
    we load the appropriate profile to converse with the AWS services. In that
    case, make sure your local `~/aws/credentials` config is properly setup, as
    per https://boto3.readthedocs.io/en/latest/guide/configuration.html#aws-config-file

    Also, if you want to assume a role, you should setup that file as per
    https://boto3.readthedocs.io/en/latest/guide/configuration.html#assume-role-provider
    as we do not read those settings from the `secrets` object.
    """  # noqa: E501
    configuration = configuration or {}
    aws_profile_name = configuration.get("aws_profile_name")
    aws_assume_role_arn = configuration.get("aws_assume_role_arn")
    params = get_credentials(secrets)

    region = configuration.get("aws_region")
    if not region:
        logger.debug(
            "The configuration key `aws_region` is not set, looking in the "
            "environment instead for `AWS_REGION` or `AWS_DEFAULT_REGION`")
        region = os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION"))
        if not region:
            raise InterruptExecution("AWS requires a region to be set!")

    if region:
        logger.debug("Using AWS region '{}'".format(region))
        params["region_name"] = region

    if boto3.DEFAULT_SESSION is None:
        # we must create our own session so that we can populate the profile
        # name when it is provided. Only create the default session once.
        boto3.setup_default_session(profile_name=aws_profile_name, **params)

    if not aws_assume_role_arn:
        logger.debug(
            "Client will be using profile '{}' from boto3 session".format(
                aws_profile_name or "default"))
        return boto3.client(resource_name, **params)
    else:
        logger.debug(
            "Fetching credentials dynamically assuming role '{}'".format(
                aws_assume_role_arn))

        aws_assume_role_session_name = configuration.get(
            "aws_assume_role_session_name")
        if not aws_assume_role_session_name:
            aws_assume_role_session_name = "ChaosToolkit"
            logger.debug(
                "You are missing the `aws_assume_role_session_name` "
                "configuration key. A unique one was generated: '{}'".format(
                    aws_assume_role_session_name))

        client = boto3.client('sts', **params)
        params = {
            "RoleArn": aws_assume_role_arn,
            "RoleSessionName": aws_assume_role_session_name
        }
        response = client.assume_role(**params)
        creds = response['Credentials']
        logger.debug(
            "Temporary credentials will expire on {}".format(
                creds["Expiration"].isoformat()))

        params = {
            "aws_access_key_id": creds['AccessKeyId'],
            "aws_secret_access_key": creds['SecretAccessKey'],
            "aws_session_token": creds['SessionToken']
        }
        if region:
            params["region_name"] = region

        return boto3.client(resource_name, **params)
예제 #27
0
def before_loading_experiment_control(context: str):
    raise InterruptExecution("failed to load: {}".format(context))
예제 #28
0
 def create(self, secrets: Secrets) -> AADMixin:
     raise InterruptExecution("Not implemented")
예제 #29
0
 def handler(signum, frame):
     raise InterruptExecution("boom")
def before_activity_control(context: Activity, target_activity_name: str, **kwargs):
    if context.get("name") == target_activity_name:
        raise InterruptExecution("let's blow this up")