Example #1
0
def upgrade_tiller(namespace):
    """
    Updates the version of Tiller in a namespace to match the currently configured Helm client.
    An exception will be thrown if Tiller is not present.
    Args:
        namespace: The namespace of the Tiller deployment
    """
    # Check if Tiller is already at the correct version
    (rc, output) = baseutils.exe_cmd(
        '{helm} version --tiller-namespace {namespace} --short'.format(
            helm=helm_binary, namespace=baseutils.shell_escape(namespace)))
    output = output.strip().splitlines()
    client_version = output[0].strip().split()[1]
    tiller_version = output[1].strip().split()[1]
    if client_version != tiller_version:
        deployment = k8s.get('deployment',
                             namespace=namespace,
                             name='tiller-deploy')
        pod_spec = deployment['spec']['template']['spec']
        service_account_name = pod_spec['serviceAccountName']
        container_spec = pod_spec['containers'][0]
        override = None
        if 'command' in container_spec:
            override = '"spec.template.spec.containers[0].command"="{{{{{command}}}}}"'.format(
                command=','.join(container_spec['command']))
        baseutils.exe_cmd(
            '{helm} init --history-max 20 --tiller-namespace {namespace} --service-account {service_account_name} {override} --upgrade'
            .format(helm=helm_binary,
                    namespace=baseutils.shell_escape(namespace),
                    service_account_name=baseutils.shell_escape(
                        service_account_name),
                    override='--override {override}'.format(
                        override=baseutils.shell_escape(override))
                    if override else ''))
Example #2
0
def import_certificates(release_name):
    """
    Triggers the import of certificates from Certificate Manager into the Kubernetes cluster if the correct annotations are found on ingress resources.
    Specifically, ingress resources may have an ingress with key "p2paas-certificate" and the value equal to the CRN in Certificate Manager.
    The secret created in Kubernetes will be named as "{namespace-of-ingress}-{name-of-ingress}".
    Args:
        release_name: The name of the release to check for certificate annotations
    """
    cluster_info = None
    manifest = helm.get_manifest(release_name)
    for resource in manifest:
        kind = resource['kind'].lower()
        if kind == 'ingress':
            annotations = resource['metadata'].get('annotations', {})
            certificate_crn = annotations.get('p2paas-certificate')
            if certificate_crn:
                if not cluster_info:
                    cluster_info = json.loads(
                        k8s.get('configmap', 'kube-system',
                                'cluster-info')['data']['cluster-config.json'])
                certificate_secret_name = '{namespace}.{name}'.format(
                    namespace=resource['metadata'].get('namespace', 'default'),
                    name=resource['metadata']['name'])
                logger.info(
                    'Importing certificate "{crn}" as secret "{secret}"'.
                    format(crn=certificate_crn,
                           secret=certificate_secret_name))
                ibmcloud.ks_alb_cert_deploy(cluster_info['cluster_id'],
                                            certificate_secret_name,
                                            certificate_crn)
Example #3
0
File: sos.py Project: sakaul88/dd
def _get_current_iks_ovpn_config_name(cluster_name):
    """
    Retrieves the name of the currently associated ovpn config associated to an IKS cluster
    Args:
        cluster_name: The name of the cluster to retrieve the ovpn name for
    Returns: The name of config if there exists a reserved config, otherwise None is returned
    """
    iks_ovpn_config_name = None
    if k8s.exists('secret', 'ibm-services-system', 'sos-vpn-secret'):
        vpn_secret = k8s.get('secret', 'ibm-services-system', 'sos-vpn-secret')
        for key in vpn_secret['data']:
            if key.endswith('.ovpn'):
                iks_ovpn_config_name = key
                with baseutils.local_lock(
                        lock_name=iks_ovpn_reservation_lock_name):
                    # Ensure the reservation system is in-sync with the current state of the IKS cluster
                    vault_reservation_path = '{parent}/reservations/{config_name}'.format(
                        parent=vault_iks_ovpn_path,
                        config_name=iks_ovpn_config_name)
                    current_reservation_owner = vault.read(
                        vault_reservation_path, property='cluster')
                    if current_reservation_owner:
                        if current_reservation_owner != cluster_name:
                            raise Exception(
                                'Cluster is using and ovpn config reserved by a different cluster'
                            )
                    else:
                        vault.write(vault_reservation_path,
                                    {'cluster': cluster_name})
                break
    return iks_ovpn_config_name
Example #4
0
def set_cluster_autoscaler(enabled, worker_pool_names=None, new_worker_pool_names=None):
    """
    Enables or disables the cluster autoscaler in a cluster.
    This will neither install nor uninstall the autoscaler, merely update the configuration of the autoscaler if present.
    If the autoscaler is installed but a given worker pool is not already present in the autoscaler config, it will not be added.
    Args:
        enabled: Whether to enable or disable the cluster autoscaler. True = enable, False = disable
        worker_pool_names: If present, only the passed list of pools will be enabled/disabled(Optional, default: all worker pools currently configured)
        new_worker_pool_names: If worker_pool_names is also specified, element n in worker_pool_names will be renamed to element n in new_worker_pool_names.
                               Each element in worker_pool_names must have a corresponding entry in new_worker_pool_names and at the same index (Optional)
    Returns: A list of the worker pools that had their configuration changed
    """
    modified_pools = []
    if k8s.exists('configmap', 'kube-system', 'iks-ca-configmap'):
        config_map = k8s.get('configmap', 'kube-system', 'iks-ca-configmap')
        worker_pools_config = json.loads(config_map['data']['workerPoolsConfig.json'])
        rename_worker_pools = new_worker_pool_names and worker_pool_names and len(new_worker_pool_names) == len(worker_pool_names)
        for pool_config in worker_pools_config:
            if not worker_pool_names or pool_config['name'] in worker_pool_names:
                if rename_worker_pools:
                    pool_config['name'] = new_worker_pool_names[worker_pool_names.index(pool_config['name'])]
                    pool_config['enabled'] = enabled
                    modified_pools.append(pool_config['name'])
                elif pool_config['enabled'] != enabled:
                    pool_config['enabled'] = enabled
                    modified_pools.append(pool_config['name'])
        if modified_pools:
            config_map['data']['workerPoolsConfig.json'] = json.dumps(worker_pools_config, ensure_ascii=False)  # TODO: Remove ensure_ascii when migration to py3 is complete
            k8s.apply(config_map)
    else:
        logger.info('Cluster autoscaler is not present')
    return modified_pools
Example #5
0
def wait_for_node_namespace_pods(node, namespace):
    """
    Waits until the pods in a particular namespace on a specified node node are Running.
    This can be useful when perform node maintenance when some namespaces do not utilise PodDisruptionBudgets.
    This function continually polls for the status of pods in the namespace assigned to a given node and returns when they are all Running or Completed.
    I the node is scaled out during the process, the function will return without error.
    Args:
        node: The name of the node to wait on. This is generally the IP of the node
        namespace: The namespace to poll
    """
    pods_ready = False
    while not pods_ready:
        pods_ready = True
        time.sleep(15)
        pods = k8s.get('pod', namespace=namespace)
        for pod in pods:
            pod_status = pod['status']
            if pod_status.get('hostIP') == node:  # We are not checking pods with an empty hostIP as we can't tell if it's actually waiting for the current host
                # Pod is located on the host we are monitoring
                pod_phase = pod_status['phase']
                if pod_phase != 'Succeeded':  # A succeeded pod is successfully complete
                    if pod_phase == 'Running':
                        for container_status in pod_status['containerStatuses']:
                            if not container_status['ready']:
                                pods_ready = False
                                break
                    else:  # A non-running, non-succeeded pod is not ready
                        pods_ready = False
                        break
Example #6
0
File: helm.py Project: sakaul88/dd
def get_tiller_version():
    """
    Retrieves the current version of Tiller in the environment.
    If Tiller is not present, an exception will be triggered by the underlying Kubernetes apis.
    Returns: The installed version of Tiller
    """
    tiller_deployment = k8s.get('deployment', namespace='kube-system', name='tiller-deploy')
    return tiller_deployment['spec']['template']['spec']['containers'][0]['image'].split(':')[-1]
Example #7
0
File: helm.py Project: sakaul88/dd
def _check_for_resource_pod_errors(kind, namespace, name):
    """
    Checks if pods from a resource enter an error state.
    If an error state is detected, an exception is raised.
    Args:
        kind: The kind of the resource owning the pods
        namespace: The namespace of the resource owning the pods
        name: The name of the resource owning the pods
    """
    live_resource = k8s.get(kind, namespace=namespace, name=name)
    # Evaluate pods specific to the new converging state only. Do not include pods that may have been in a bad state pre-upgrade
    if kind == 'deployment':
        replica_sets = k8s.get('replicaset', namespace=namespace, labels=live_resource['spec']['selector']['matchLabels'])
        for replica_set in replica_sets:
            if live_resource['metadata']['annotations']['deployment.kubernetes.io/revision'] == replica_set['metadata']['annotations']['deployment.kubernetes.io/revision']:
                match_labels = replica_set['spec']['selector']['matchLabels']
                break
    elif kind == 'daemonset':
        match_labels = live_resource['spec']['selector']['matchLabels']
        match_labels['pod-template-generation'] = live_resource['metadata']['generation']
    elif kind == 'statefulset':
        match_labels = live_resource['spec']['selector']['matchLabels']
        match_labels['controller-revision-hash'] = live_resource['status']['updateRevision']
    pods = k8s.get('pod', namespace=namespace, labels=match_labels)
    for pod in pods:
        pod_name = pod['metadata']['name']
        for container_status in pod['status'].get('containerStatuses', []):
            wait_reason = container_status['state'].get('waiting', {}).get('reason')
            if wait_reason and (wait_reason == 'CrashLoopBackOff' or wait_reason == 'ErrImagePull' or wait_reason == 'InvalidImageName' or wait_reason == 'RunContainerError' or
                                wait_reason == 'ImagePullBackOff' or wait_reason.endswith(' not found') or wait_reason.startswith('Couldn\'t find ')):
                # A failed container has been identified. Log useful information before raising exception
                k8s.describe('pod', namespace=namespace, name=pod_name)
                k8s.logs(pod_name, namespace=namespace, container=container_status['name'])
                raise Exception('The pod {pod} has entered the failed waiting state "{state}" during chart upgrade. {message}'.format(
                    pod=pod_name, state=wait_reason, message=container_status['state']['waiting'].get('message', '')))
            if container_status.get('restartCount', 0) > 0:
                k8s.describe('pod', namespace=namespace, name=pod_name)
                k8s.logs(pod_name, namespace=namespace, container=container_status['name'])
                raise Exception('The pod {pod} has restarted (failed) during chart upgrade'.format(pod=pod_name))
Example #8
0
File: helm.py Project: sakaul88/dd
def wait_for_release_resources(release):
    """
    Waits until a release's resources are all initalised.
    For resources with replicas, this means all pods must be started and passing their probes.
    For load_balancer services, this means waiting until the ingresses are initialised.
    If this is an upgrade of a release with replicas, the pre-upgrade state of the pods is required to know when they have been replaced.
    Pods will only be waited upon if their Deployment or Statefule set is configured for RollingUpdate and either their image tag or env vars have been updated.
    Args:
        release: The name of the release to wait on
    """
    manifest = get_manifest(release)
    time.sleep(2)  # Waiting to ensure new replica values has been rolled our from manifest to the deployed resources
    max_replicas = get_max_replicas_count_in_manifest(manifest)
    timeout_value = max(900, (max_replicas * 300) + 60)  # Minimum timeout value is 900. Otherwise, define it base on max number of replicas of any resource
    logger.info('Configuring timeout period set to {timeout} seconds for release "{release}"'.format(timeout=timeout_value, release=release))
    with baseutils.timeout(seconds=timeout_value):
        logger.info('Waiting for resources in release "{release}" to enter ready state'.format(release=release))
        for resource in manifest:
            kind = resource['kind'].lower()
            name = resource['metadata']['name']
            namespace = resource['metadata'].get('namespace')
            if kind in ['deployment', 'daemonset', 'statefulset']:
                logger.info('Tracking rollout status of "{kind}" "{name}"'.format(kind=kind, name=name))
                rollout_status = ''
                try:
                    while ('rolling update complete' not in rollout_status
                           and 'successfully rolled out' not in rollout_status
                           and 'roll out complete' not in rollout_status):
                        time.sleep(5)
                        rollout_status = k8s.rollout_status(kind, name, namespace=namespace)
                        _check_for_resource_pod_errors(kind, namespace, name)
                    logger.info('Pods for "{kind}" "{name}" have been rolled out'.format(kind=kind, name=name))
                except Exception as e:
                    if 'Status is available only for RollingUpdate strategy type' in str(e):
                        logger.info('"{kind}" "{name}" is not configured for rolling updates'.format(kind=kind, name=name))
                    else:
                        raise
            elif kind == 'service' and resource['spec'].get('type') == 'LoadBalancer':
                resource_ready = False
                while not resource_ready:
                    time.sleep(5)
                    live_resource = k8s.get(kind, namespace=namespace, name=name)
                    ingress = live_resource['status']['loadBalancer'].get('ingress')
                    if ingress and 'ip' in ingress[0] and 'clusterIP' in live_resource['spec']:
                        resource_ready = True
Example #9
0
 def test_get(self, mock_exe_cmd):
     mock_exe_cmd.return_value = (0, '{"items": [], "kind": "List"}')
     self.assertEqual([], k8s.get('clusterrolebinding'))
     self.assertEqual([], k8s.get('deployment', namespace='kube-config'))
     self.assertEqual([], k8s.get('pod', namespace='all'))
     mock_exe_cmd.return_value = (0, '{}')
     self.assertEqual({}, k8s.get('serviceaccount', name='name'))
     self.assertEqual({},
                      k8s.get('pod', name='name', namespace='kube-config'))
     self.assertEqual({},
                      k8s.get('node', name='name', labels='dedicated=edge'))
     self.assertEqual({},
                      k8s.get('node',
                              name='name',
                              labels={
                                  'dedicated': 'edge',
                                  'key2': 'label2'
                              }))
     mock_exe_cmd.return_value = (0, pod_output)
     self.assertEqual(json.loads(pod_output),
                      k8s.get('Pod', name='tiller-deploy-5c477df6bf-rsjhp'))
Example #10
0
def set_recovery_tool(enabled):
    """
    Sets ibm-worker-recovery tool "Enabled" attribute, which governs whether it monitors nodes for failures.
    This can be used to disable the tool when performing maintenance on IKS nodes. Otherwise an intermediate state of a node could trigger the tool to queue up a reload.
    If the recovery tool is not present in an environment, nothing will be done.
    Currently only KUBEAPI checks are enabled/disabled.
    Args:
        enabled: Boolean value to define if the ibm-worker-recovery tool should be enabled.
    """
    if k8s.exists('configmap', 'kube-system', 'ibm-worker-recovery-checks'):
        config_map = k8s.get('configmap', 'kube-system', 'ibm-worker-recovery-checks')
        for check in config_map['data']:
            check_config = json.loads(config_map['data'][check])
            if check_config['Check'] == 'KUBEAPI':
                check_config['Enabled'] = enabled
                config_map['data'][check] = json.dumps(check_config, ensure_ascii=False)  # TODO: Remove ensure_ascii when migration to py3 is complete
        k8s.apply(config_map)
    else:
        logger.info('IBM Auto-Recovery tool is not present')
Example #11
0
def run_release_tests(release_name):
    """
    Runs post-deployment Helm tests if the chart contains Helm tests.
    Nothing will be done if the release does not contain tests.
    Pre-existing test pods will be cleaned up prior to executing the tests.
    An exception is raised if the tests fail. Logs of the test containers are captured.
    Args:
        release: The name of the release to test
    """
    release_tests = helm.get_hooks(release_name,
                                   resource_types=['Pod'],
                                   hook_types=['test-success', 'test-failure'])
    if release_tests:
        for release_test in release_tests:
            test_kind = release_test['kind']
            test_namespace = release_test['metadata'].get(
                'namespace', 'default')
            test_name = release_test['metadata']['name']
            if k8s.exists(test_kind, test_namespace, test_name):
                k8s.delete(test_kind, test_namespace, test_name)
        try:
            helm.test(release_name)
        finally:
            # Grab the logs of any (failed) tests
            for release_test in release_tests:
                test_kind = release_test['kind']
                test_namespace = release_test['metadata'].get(
                    'namespace', 'default')
                test_name = release_test['metadata']['name']
                if k8s.exists(test_kind, test_namespace, test_name):
                    test_pod = k8s.get(test_kind,
                                       namespace=test_namespace,
                                       name=test_name)
                    for test_container in test_pod['spec']['containers']:
                        test_container_name = test_container['name']
                        logger.info(
                            'Logs for test pod "{pod}" container "{container}":'
                            .format(pod=test_name,
                                    container=test_container_name))
                        k8s.logs(test_name,
                                 namespace=test_namespace,
                                 container=test_container_name)
Example #12
0
def install_helm(helm_version):
    """
    Install Helm and Tiller into the Kubernetes infrastructure.
    This assumes Tiller is to be installed in the kube-system namespace. It will upgrade Tiller if it is already present.
    It is safe to call this function multiple times. There are checks for understanding the current state of the Helm/Tiller deployment and only necessary updates are made.
    Args:
        helm_version: The version of helm that should be installed, eg: v2.11.1
    """
    # First check and ensure that the correct client version is present
    (rc, output) = baseutils.exe_cmd(
        '{helm} version --client'.format(helm=helm_binary),
        raise_exception=False,
        log_level=logging.NOTSET)
    if rc or helm_version not in output:
        tmp_dir = tempfile.mkdtemp()
        try:
            helm_tar = baseutils.shell_escape(
                os.path.join(tmp_dir, 'helm.tar.gz'))
            baseutils.exe_cmd('/usr/bin/curl -L {url} -o {helm_tar}'.format(
                url=baseutils.shell_escape(
                    'https://storage.googleapis.com/kubernetes-helm/helm-{version}-linux-amd64.tar.gz'
                    .format(version=helm_version)),
                helm_tar=helm_tar))
            baseutils.exe_cmd(
                '/bin/tar -xzvf {helm_tar} -C {tmp_dir} && rm -f {helm_tar}'.
                format(helm_tar=helm_tar,
                       tmp_dir=baseutils.shell_escape(tmp_dir)))
            os.rename(os.path.join(tmp_dir, 'linux-amd64', 'helm'),
                      helm_binary.strip('\''))
            os.chmod(helm_binary.strip('\''), 0o755)
        finally:
            shutil.rmtree(tmp_dir)
    # Secondly check that the correct version of Tiller is installed into the Kubernetes cluster
    (rc, output) = baseutils.exe_cmd('{helm} version'.format(helm=helm_binary),
                                     raise_exception=False,
                                     log_level=logging.NOTSET)
    if rc:
        # Tiller is not installed. We must check if the service account exists yet
        service_accounts = k8s.get('serviceaccount', namespace='kube-system')
        if 'tiller' not in [
                service_account['metadata']['name']
                for service_account in service_accounts
        ]:
            k8s.apply({
                'apiVersion': 'v1',
                'kind': 'ServiceAccount',
                'metadata': {
                    'name': 'tiller',
                    'namespace': 'kube-system'
                }
            })
        cluster_role_bindings = k8s.get('clusterrolebinding')
        if 'tiller' not in [
                cluster_role_binding['metadata']['name']
                for cluster_role_binding in cluster_role_bindings
        ]:
            k8s.apply({
                'apiVersion':
                'rbac.authorization.k8s.io/v1',
                'kind':
                'ClusterRoleBinding',
                'metadata': {
                    'name': 'tiller',
                },
                'roleRef': {
                    'apiGroup': 'rbac.authorization.k8s.io',
                    'kind': 'ClusterRole',
                    'name': 'cluster-admin'
                },
                'subjects': [{
                    'kind': 'ServiceAccount',
                    'name': 'tiller',
                    'namespace': 'kube-system'
                }]
            })
        baseutils.exe_cmd(
            '{helm} init  --history-max 20 --service-account tiller --override "spec.template.spec.containers[0].command"="{{/tiller,--storage=secret}}"'
            .format(helm=helm_binary))
    elif output.count(helm_version) != 2:
        # Tiller is installed but it is an old version. Upgrade it
        baseutils.exe_cmd(
            '{helm} init --history-max 20 --service-account tiller --override "spec.template.spec.containers[0].command"="{{/tiller,--storage=secret}}" --upgrade'
            .format(helm=helm_binary))
    else:
        # Tiller is correctly configured. We still need to init the client to facilitate the usage of helm repositories
        baseutils.exe_cmd('{helm} init --client-only'.format(helm=helm_binary))