Ejemplo n.º 1
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(self.config['clusterId'])
        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]
        
        # get the object for the cluster, GKE side
        print(clusters.zone)
        cluster = clusters.get_cluster(cluster_name)
        
        node_pool_id = self.config.get('nodePoolId', None)
        if node_pool_id is None or len(node_pool_id) == 0:
            node_pools = cluster.get_node_pools() # CRASHES HERE
            node_pool_ids = [node_pool.name for node_pool in node_pools]
        else:
            node_pool_ids = [node_pool_id]

        node_pools = []
        for node_pool_id in node_pool_ids:
            node_pool = cluster.get_node_pool(node_pool_id)
            
            node_pool_info = node_pool.get_info()
            node_pool_info["instanceGroups"] = node_pool.get_instance_groups_info()
            
            node_pools.append('<h5>%s</h5><pre class="debug">%s</pre>' % (node_pool_id, json.dumps(node_pool_info, indent=2)))
        
        return '<div>%s</div>' % ''.join(node_pools)
Ejemplo n.º 2
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']

        env = os.environ.copy()
        env['KUBECONFIG'] = kube_config_path
        cmd = ['kubectl'] + self.config.get('args', [])
        if not _is_none_or_blank(self.config.get("namespace", "")):
            cmd = cmd + ["--namespace", self.config.get("namespace", "")]
        if not _is_none_or_blank(self.config.get(
                "format", "")) and self.config.get("format", "") != 'none':
            cmd = cmd + ["-o", self.config.get("format", "")]
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=20)
            rv = 0
        except KubeCommandException as e:
            rv = e.rv
            out = e.out
            err = e.err

        out_html = '<div class="alert alert-info"><div>Output</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % out
        err_html = '<div class="alert alert-danger"><div>Error</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % err
        if rv == 0 or _is_none_or_blank(err):
            return out_html
        else:
            return ('<div class="alert alert-danger">Failed with code %s</div>'
                    % rv) + err_html + out_html
Ejemplo n.º 3
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]

        # get the object for the cluster, GKE side
        cluster = clusters.get_cluster(cluster_name)

        node_pool_id = self.config.get('nodePoolId', None)
        node_pools = cluster.get_node_pools()
        if node_pool_id is None or len(node_pool_id) == 0:
            node_pool_ids = [node_pool.name for node_pool in node_pools]
            cnt = 0
            while ('node-pool-%s' % cnt) in node_pool_ids:
                cnt += 1
            node_pool_id = 'node-pool-%s' % cnt

        node_pool = cluster.get_node_pool(node_pool_id)

        node_pool_config = self.config.get("nodePoolConfig", {})
        node_pool_builder = node_pool.get_node_pool_builder()
        node_pool_builder.with_node_count(node_pool_config.get('numNodes', 3))
        node_pool_builder.use_gcr_io(node_pool_config.get('useGcrIo', False))
        node_pool_builder.with_oauth_scopes(
            node_pool_config.get('oauthScopes', None))
        node_pool_builder.with_machine_type(
            node_pool_config.get('machineType', None))
        node_pool_builder.with_disk_type(node_pool_config.get(
            'diskType', None))
        node_pool_builder.with_disk_size_gb(
            node_pool_config.get('diskSizeGb', None))
        node_pool_builder.with_gpu(node_pool_config.get('withGpu', False),
                                   node_pool_config.get('gpuType', None),
                                   node_pool_config.get('gpuCount', 1))
        node_pool_builder.with_service_account(
            node_pool_config.get('serviceAccountType', None),
            node_pool_config.get('serviceAccount', None))
        node_pool_builder.with_nodepool_labels(
            node_pool_config.get('nodepoolLabels', {}))

        create_op = node_pool_builder.build()
        logging.info("Waiting for cluster node pool creation")
        create_op.wait_done()
        logging.info("Cluster node pool created")

        # Launch NVIDIA driver installer daemonset (will only apply on tainted gpu nodes)
        create_installer_daemonset(kube_config_path=kube_config_path)

        return '<pre class="debug">%s</pre>' % json.dumps(node_pool.get_info(),
                                                          indent=2)
Ejemplo n.º 4
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(self.config['clusterId'])

        # the cluster is accessible via the kubeconfig
        kube_config_path = dss_cluster_settings.get_raw()['containerSettings']['executionConfigsGenericOverrides']['kubeConfigPath']

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]
        
        result = ''
        
        host = os.environ.get('DKU_BACKEND_EXT_HOST', socket.gethostname())
        port = os.environ['DKU_BACKEND_PORT']
        result = result + '<h5>Checking connectivity to %s:%s from pod in cluster</h5>' % (host, port)
        
        def add_to_result(result, op, cmd, out, err):
             return result + '<h5>%s</h5><div style="margin-left: 20px;"><div>Command</div><pre class="debug">%s</pre><div>Output</div><pre class="debug">%s</pre><div>Error</div><pre class="debug">%s</pre></div>' % (op, json.dumps(cmd), out, err)

        try:
            # sanity check
            if host.startswith("127.0.0") or 'localhost' in host:
                raise Exception('Host appears to not be a public hostname. Set DKU_BACKEND_EXT_HOST')
            with BusyboxPod(kube_config_path) as b:
                # check that the pod resolved the hostname
                ip = None
                cmd = ['nslookup', host]
                out, err = b.exec_cmd(cmd)
                result = add_to_result(result, 'Resolve host', cmd, out, err)
                for line in out.split('\n'):
                    m = re.match('^Address.*\\s([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+[^\\s]*)\\s.*$', line)
                    if m is not None:
                        ip = m.group(1)
                if ip is None:
                    raise Exception('Hostname resolution of DSS node failed: %s' % out)
                    
                result = result + '<h5>Host %s resolved to %s</h5>' % (host, ip)

                # try to connect on the backend port
                cmd = ['nc', '-vz', ip, port]
                out, err = b.exec_cmd(cmd)
                result = add_to_result(result, 'Test connection to port', cmd, out, err)
                if 'no route to host' in err.lower():
                    raise Exception("DSS node resolved but unreachable on port %s : %s" % (port, err))

                result = result + '<h5>Connection successful</h5>'

        except KubeCommandException as e:
            result = result + '<div class="alert alert-error"><div>%s</div><div>out:</div><pre>%s</pre><div>err:</div><pre>%s</pre></div>' % (str(e), e.out, e.err)
        except Exception as e:
            result = result + '<div class="alert alert-error">%s</div>' % str(e)
                
        return '<div>%s</div>' % result
            
Ejemplo n.º 5
0
 def run(self, progress_callback):
     cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(self.config['clusterId'])
     # retrieve the actual name in the cluster's data
     if cluster_data is None:
         raise Exception("No cluster data (not started?)")
     cluster_def = cluster_data.get("cluster", None)
     if cluster_def is None:
         raise Exception("No cluster definition (starting failed?)")
     cluster_id = cluster_def["id"]
     _,_,subscription_id,_,resource_group,_,_,_,cluster_name = cluster_id.split("/")
     cluster = clusters.managed_clusters.get(resource_group, cluster_name)
     return '<pre class="debug">%s</pre>' % json.dumps(cluster.as_dict()['agent_pool_profiles'], indent=2)
Ejemplo n.º 6
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]

        resource_group_name = dss_cluster_config['config']['resourceGroup']
        # get the object for the cluster, AKS side
        cluster = clusters.managed_clusters.get(resource_group_name,
                                                cluster_name)

        node_pool_id = self.config.get('nodePoolId', None)
        node_pool = None
        for profile in cluster.agent_pool_profiles:
            if profile.name == node_pool_id or (
                    _is_none_or_blank(node_pool_id)
                    and len(cluster.agent_pool_profiles) == 1):
                node_pool = profile
        if node_pool is None:
            raise Exception("Unable to find node pool '%s'" % (node_pool_id))

        # see aks_scale() in azure-cli code
        cluster.service_principal_profile = None
        cluster.aad_profile = None

        desired_count = self.config['numNodes']
        logging.info("Resize to %s" % desired_count)
        if desired_count == 0:
            raise Exception("Can't delete node pool '%s'" % (node_pool_id))
        else:
            node_pool.count = desired_count
            logging.info("Waiting for cluster resize")

        def do_update():
            cluster_update_op = clusters.managed_clusters.create_or_update(
                resource_group_name, cluster_name, cluster)
            return cluster_update_op.result()

        update_result = run_and_process_cloud_error(do_update)
        logging.info("Cluster updated")
        return '<pre class="debug">%s</pre>' % json.dumps(
            update_result.as_dict(), indent=2)
Ejemplo n.º 7
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]

        # get the object for the cluster, GKE side
        cluster = clusters.get_cluster(cluster_name)

        node_pool_id = self.config.get('nodePoolId', None)
        node_pools = cluster.get_node_pools()
        if node_pool_id is None or len(node_pool_id) == 0:
            node_pool_ids = [node_pool.name for node_pool in node_pools]
            if len(node_pool_ids) != 1:
                raise Exception(
                    "Cluster has %s node pools, cannot resize. Specify a node pool explicitely among %s"
                    % (len(node_pool_ids), json.dumps(node_pool_ids)))
            node_pool_id = node_pool_ids[0]

        node_pool = cluster.get_node_pool(node_pool_id)

        desired_count = self.config['numNodes']
        logging.info("Resize to %s" % desired_count)
        if desired_count == 0:
            delete_op = node_pool.delete()
            logging.info("Waiting for cluster node pool delete")
            delete_op.wait_done()
            logging.info("Cluster node pool deleted")
            node_pool_ids = [
                node_pool.name for node_pool in cluster.get_node_pools()
            ]
            return '<pre class="debug">%s</pre>' % json.dumps(node_pool_ids,
                                                              indent=2)
        else:
            resize_op = node_pool.resize(self.config['numNodes'])
            logging.info("Waiting for cluster resize")
            resize_op.wait_done()
            logging.info("Cluster resized")
            return '<pre class="debug">%s</pre>' % json.dumps(
                node_pool.get_info(), indent=2)
Ejemplo n.º 8
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]

        resource_group_name = dss_cluster_config['config']['resourceGroup']
        # get the object for the cluster, AKS side
        cluster = clusters.managed_clusters.get(resource_group_name,
                                                cluster_name)

        return '<pre class="debug">%s</pre>' % json.dumps(
            cluster.as_dict()['agent_pool_profiles'], indent=2)
Ejemplo n.º 9
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]

        # the cluster is accessible via the kubeconfig
        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']

        if has_autoscaler(kube_config_path):
            return '<h5>An autoscaler pod already runs<h5>'
        else:
            add_autoscaler_if_needed(cluster_id, kube_config_path)
            return '<h5>Created an autoscaler pod<h5>'
Ejemplo n.º 10
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        if get_cluster_generic_property(dss_cluster_settings,
                                        'alb-ingress.controller',
                                        'false') == 'true':
            raise Exception("ALB controller already installed, remove first")

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]
        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']
        connection_info = dss_cluster_config.get('config',
                                                 {}).get('connectionInfo', {})

        env = os.environ.copy()
        env['KUBECONFIG'] = kube_config_path

        command_outputs = []
        keep_going = True

        # setup iam stuff in eksctl
        args = ['utils', 'associate-iam-oidc-provider', '--approve']
        #args = args + ['-v', '4']
        args = args + ['--cluster', cluster_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        c = EksctlCommand(args, connection_info)
        command_outputs.append(c.run())
        if command_outputs[-1][1] != 0:
            return make_html(command_outputs)

        # checking if we need to create the policy
        policy_name = self.config.get('policyName',
                                      'ALBIngressControllerIAMPolicy')

        args = ['iam', 'list-policies']

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        c = AwsCommand(args, connection_info)
        command_outputs.append(c.run())
        if command_outputs[-1][1] != 0:
            return make_html(command_outputs)

        policy_arn = None
        for policy in json.loads(command_outputs[-1][2])['Policies']:
            if policy.get('PolicyName', None) == policy_name:
                policy_arn = policy.get('Arn', None)

        if policy_arn is None:
            if not self.config.get("createPolicy", False):
                raise Exception(
                    "Policy %s doesn't exist and the macro isn't allowed to create it"
                    % policy_name)
            # create the policy
            policy_document_url = 'https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.8/docs/examples/iam-policy.json'
            policy_document = requests.get(policy_document_url).text
            with open("policy.json", "w") as p:
                p.write(policy_document)

            args = ['iam', 'create-policy']
            args = args + ['--policy-name', policy_name]
            args = args + ['--policy-document', 'file://policy.json']

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            c = AwsCommand(args, connection_info)
            command_outputs.append(c.run())
            if command_outputs[-1][1] != 0:
                return make_html(command_outputs)

            policy_arn = json.loads(command_outputs[-1][2])['Policy'].get(
                'Arn', None)

        # create the role on the cluster
        cmd = [
            'kubectl', 'apply', '-f',
            'https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.4/docs/examples/rbac-role.yaml'
        ]
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=100)
            command_outputs.append((cmd, 0, out, err))
        except KubeCommandException as e:
            command_outputs.append((cmd, e.rv, e.out, e.err))
            keep_going = False

        if not keep_going:
            return make_html(command_outputs)

        # attach the role to the policy

        args = [
            'create', 'iamserviceaccount',
            '--override-existing-serviceaccounts', '--approve'
        ]
        #args = args + ['-v', '4']
        args = args + ['--name', 'alb-ingress-controller'
                       ]  # that's the name in the rbac-role.yaml
        args = args + ['--namespace', 'kube-system'
                       ]  # that's the name in the rbac-role.yaml
        args = args + ['--cluster', cluster_id]
        args = args + ['--attach-policy-arn', policy_arn]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        c = EksctlCommand(args, connection_info)
        command_outputs.append(c.run())
        if command_outputs[-1][1] != 0:
            return make_html(command_outputs)

        r = requests.get(
            'https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.4/docs/examples/alb-ingress-controller.yaml'
        )
        service_data = r.content
        cluster_flag_pattern = '#.*cluster\\-name=.*'
        cluster_flag_replacement = '- --cluster-name=%s' % cluster_id
        service_data = re.sub(cluster_flag_pattern, cluster_flag_replacement,
                              service_data)

        print(service_data)
        with open('./alb-ingress-controller.yaml', 'w') as f:
            f.write(service_data)

        cmd = ['kubectl', 'apply', '-f', './alb-ingress-controller.yaml']
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=100)
            command_outputs.append((cmd, 0, out, err))
        except KubeCommandException as e:
            command_outputs.append((cmd, e.rv, e.out, e.err))
            keep_going = False

        if not keep_going:
            return make_html(command_outputs)

        if self.config.get("tagSubnets", False):
            networking_settings = dss_cluster_config.get('config', {}).get(
                'networkingSettings', {})
            subnets = networking_settings.get('subnets', [])
            if networking_settings.get('privateNetworking', False):
                private_subnets = dss_cluster_config.get('config', {}).get(
                    'networkingSettings', {}).get('privateSubnets', [])
            else:
                private_subnets = []

            def add_tags(resources, tag, connection_info, command_outputs):
                args = ['ec2', 'create-tags']

                if _has_not_blank_property(connection_info, 'region'):
                    args = args + ['--region', connection_info['region']]
                elif 'AWS_DEFAULT_REGION' is os.environ:
                    args = args + [
                        '--region', os.environ['AWS_DEFAULT_REGION']
                    ]

                args = args + ["--resources"] + resources
                args = args + ["--tags", tag]

                c = AwsCommand(args, connection_info)
                command_outputs.append(c.run())
                if command_outputs[-1][1] != 0:
                    return make_html(command_outputs)

            if len(subnets) > 0:
                add_tags(subnets, 'Key=kubernetes.io/role/elb,Value=1',
                         connection_info, command_outputs)
            if len(private_subnets) > 0:
                add_tags(private_subnets,
                         'Key=kubernetes.io/role/internal-elb,Value=1',
                         connection_info, command_outputs)

        set_cluster_generic_property(dss_cluster_settings,
                                     'alb-ingress.controller', 'true', True)

        return make_html(command_outputs)
Ejemplo n.º 11
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]

        connection_info = dss_cluster_config.get('config',
                                                 {}).get('connectionInfo', {})

        node_group_id = self.config.get('nodeGroupId', None)
        if node_group_id is None or len(node_group_id) == 0:
            args = ['get', 'nodegroup']
            #args = args + ['-v', '4']
            args = args + ['--cluster', cluster_id]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            args = args + ['-o', 'json']

            c = EksctlCommand(args, connection_info)
            node_groups = json.loads(c.run_and_get_output())
            node_group_ids = [node_group['Name'] for node_group in node_groups]
            if len(node_group_ids) != 1:
                raise Exception(
                    "Cluster has %s node groups, cannot resize. Specify a node group explicitely among %s"
                    % (len(node_group_ids), json.dumps(node_group_ids)))
            node_group_id = node_group_ids[0]

        args = ['get', 'nodegroup']
        #args = args + ['-v', '4']
        args = args + ['--cluster', cluster_id]
        args = args + ['--name', node_group_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        args = args + ['-o', 'json']

        c = EksctlCommand(args, connection_info)
        node_group_batch = json.loads(c.run_and_get_output())
        if len(node_group_batch) == 0:
            raise Exception("Unable to retrieve info of node group %s" %
                            node_group_id)

        node_group = node_group_batch[0]

        desired_count = self.config['numNodes']
        logging.info("Resize to %s" % desired_count)
        if desired_count == 0:
            args = ['delete', 'nodegroup']
            args = args + ['-v', '4']
            args = args + ['--cluster', cluster_id]
            args = args + ['--name', node_group_id]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            c = EksctlCommand(args, connection_info)
            rv, out, err = c.run_and_get()
            if rv == 0:
                logging.info("Cluster node group deleted")
                return '<div>Deleted</div><pre class="debug">%s</pre>' % node_group_id
            else:
                logging.info("Cluster node group failed to delete")
                return '<div>Failed to delete the node group</div><pre class="debug">%s</pre>' % (
                    err)

        else:
            args = ['scale', 'nodegroup']
            args = args + ['-v', '4']
            args = args + ['--cluster', cluster_id]
            args = args + ['--name', node_group_id]
            args = args + ['--nodes', str(desired_count)]
            desired_min_count = self.config.get('minNumNodes', -1)
            desired_max_count = self.config.get('maxNumNodes', -1)
            if desired_min_count > 0:
                args = args + ['--nodes-min', str(desired_min_count)]
            if desired_max_count > 0:
                args = args + ['--nodes-max', str(desired_max_count)]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            c = EksctlCommand(args, connection_info)
            rv, out, err = c.run_and_get()
            if rv == 0:
                logging.info("Cluster node group resized")
                return '<div>Resized</div><pre class="debug">%s</pre>' % node_group_id
            else:
                logging.info("Cluster node group failed to resize")
                return '<div>Failed to resize the node group</div><pre class="debug">%s</pre>' % (
                    err)
Ejemplo n.º 12
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(self.config['clusterId'])

        if get_cluster_generic_property(dss_cluster_settings, 'alb-ingress.controller', 'false') != 'true':
            raise Exception("ALB controller not installed (or not by the installation macro)")

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]
        kube_config_path = dss_cluster_settings.get_raw()['containerSettings']['executionConfigsGenericOverrides']['kubeConfigPath']
        connection_info = dss_cluster_config.get('config', {}).get('connectionInfo', {})
        
        env = os.environ.copy()
        env['KUBECONFIG'] = kube_config_path

        command_outputs = []
        keep_going = True
        
        # delete the controller
        cmd = ['kubectl', 'delete', '-f', 'https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.4/docs/examples/alb-ingress-controller.yaml']
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=100)
            command_outputs.append((cmd, 0, out, err))
        except KubeCommandException as e:
            command_outputs.append((cmd, e.rv, e.out, e.err))
            keep_going = False

        if not keep_going:
            return make_html(command_outputs)

        # detach the role from the policy
        args = ['delete', 'iamserviceaccount']
        #args = args + ['-v', '4']
        args = args + ['--name', 'alb-ingress-controller'] # that's the name in the rbac-role.yaml
        args = args + ['--namespace', 'kube-system'] # that's the name in the rbac-role.yaml
        args = args + ['--cluster', cluster_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        c = EksctlCommand(args, connection_info)
        command_outputs.append(c.run())
        if command_outputs[-1][1] != 0:
            return make_html(command_outputs)
        
        # delete the role on the cluster
        cmd = ['kubectl', 'delete', '-f', 'https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.4/docs/examples/rbac-role.yaml']
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=100)
            command_outputs.append((cmd, 0, out, err))
        except KubeCommandException as e:
            command_outputs.append((cmd, e.rv, e.out, e.err))

        set_cluster_generic_property(dss_cluster_settings, 'alb-ingress.controller', 'false', True)

        return make_html(command_outputs)
Ejemplo n.º 13
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]

        connection_info = dss_cluster_config.get('config',
                                                 {}).get('connectionInfo', {})

        node_group_id = self.config.get('nodeGroupId', None)
        if node_group_id is None or len(node_group_id) == 0:
            args = ['get', 'nodegroup']
            #args = args + ['-v', '4']
            args = args + ['--cluster', cluster_id]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            args = args + ['-o', 'json']

            c = EksctlCommand(args, connection_info)
            node_groups = json.loads(c.run_and_get_output())
            node_group_ids = [node_group['Name'] for node_group in node_groups]
        else:
            node_group_ids = [node_group_id]

        node_groups = []
        for node_group_id in node_group_ids:
            args = ['get', 'nodegroup']
            #args = args + ['-v', '4']
            args = args + ['--cluster', cluster_id]
            args = args + ['--name', node_group_id]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            args = args + ['-o', 'json']

            c = EksctlCommand(args, connection_info)
            node_group_batch = json.loads(c.run_and_get_output())
            if len(node_group_batch) == 0:
                node_groups.append(
                    '<h5>%s</h5><div class="alert alert-error">Unable to get details</div>'
                    % (node_group_id))
                continue

            node_group = node_group_batch[0]

            node_group_stack_name = node_group['StackName']

            args = ['cloudformation', 'describe-stack-resources']
            args = args + ['--stack-name', node_group_stack_name]

            c = AwsCommand(args, connection_info)
            node_group_resources = json.loads(c.run_and_get_output()).get(
                'StackResources', [])

            # find the auto-scaling-group
            auto_scaling_resource = None
            for r in node_group_resources:
                if r.get('ResourceType',
                         '') == 'AWS::AutoScaling::AutoScalingGroup':
                    auto_scaling_resource = r

            if auto_scaling_resource is None:
                node_groups.append(
                    '<h5>%s</h5><div class="alert alert-error">Unable to get auto-scaling group</div><pre class="debug">%s</pre>'
                    % (node_group_id, json.dumps(node_group, indent=2)))
                continue

            node_group_auto_scaling_id = auto_scaling_resource[
                'PhysicalResourceId']

            args = ['autoscaling', 'describe-auto-scaling-groups']
            args = args + [
                '--auto-scaling-group-names', node_group_auto_scaling_id
            ]

            c = AwsCommand(args, connection_info)
            auto_scaling_resources = json.loads(c.run_and_get_output()).get(
                'AutoScalingGroups', [])

            if len(auto_scaling_resources) == 0:
                node_groups.append(
                    '<h5>%s</h5><div class="alert alert-error">Unable to get auto-scaling group\'s resources</div><pre class="debug">%s</pre>'
                    % (node_group_id, json.dumps(node_group, indent=2)))
                continue

            auto_scaling_resource = auto_scaling_resources[0]

            min_instances = auto_scaling_resource.get('MinSize', '')
            cur_instances = len(auto_scaling_resource.get('Instances', []))
            max_instances = auto_scaling_resource.get('MaxSize', '')
            node_groups.append(
                '<h5>%s</h5><pre class="debug">%s</pre><div>Min=%s, current=%s, max=%s</div><pre class="debug">%s</pre>'
                % (node_group_id, json.dumps(node_group, indent=2),
                   min_instances, cur_instances, max_instances,
                   json.dumps(auto_scaling_resource.get('Instances', []),
                              indent=2)))

        return '<div>%s</div>' % ''.join(node_groups)
Ejemplo n.º 14
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_id = cluster_def["Name"]

        # the cluster is accessible via the kubeconfig
        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']

        connection_info = dss_cluster_config.get('config',
                                                 {}).get('connectionInfo', {})

        node_group_id = self.config.get('nodeGroupId', None)

        args = ['create', 'nodegroup']
        args = args + ['-v', '4']
        args = args + ['--cluster', cluster_id]
        if node_group_id is not None and len(node_group_id) > 0:
            args = args + ['--name', node_group_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        if dss_cluster_config['config'].get('useEcr', False):
            args = args + ['--full-ecr-access']

        if dss_cluster_config.get('privateNetworking', False):
            args = args + ['--node-private-networking']

        security_groups = dss_cluster_config['config'].get(
            'securityGroups', [])
        if len(security_groups) > 0:
            args = args + ['--node-security-groups', ','.join(security_groups)]

        node_pool = self.config.get('nodePool', {})
        if 'machineType' in node_pool:
            args = args + ['--node-type', node_pool['machineType']]
        if 'diskType' in node_pool:
            args = args + ['--node-volume-type', node_pool['diskType']]
        if 'diskSizeGb' in node_pool and node_pool['diskSizeGb'] > 0:
            args = args + ['--node-volume-size', str(node_pool['diskSizeGb'])]

        args = args + ['--nodes', str(node_pool.get('numNodes', 3))]
        if node_pool.get('numNodesAutoscaling', False):
            args = args + ['--asg-access']
            args = args + ['--nodes-min', str(node_pool.get('minNumNodes', 2))]
            args = args + ['--nodes-max', str(node_pool.get('maxNumNodes', 5))]

        c = EksctlCommand(args, connection_info)
        if c.run_and_log() != 0:
            raise Exception("Failed to add nodegroup")

        if node_pool.get('numNodesAutoscaling', False):
            logging.info("Nodegroup is autoscaling, ensuring autoscaler")
            add_autoscaler_if_needed(cluster_id, kube_config_path)

        args = ['get', 'nodegroup']
        #args = args + ['-v', '4']
        args = args + ['--cluster', cluster_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

        args = args + ['-o', 'json']

        c = EksctlCommand(args, connection_info)
        node_groups_str = c.run_and_get_output()

        return '<h5>Nodegroup added<h5><pre class="debug">%s</pre>' % node_groups_str