def add_to_cluster(cls, cluster: Cluster) -> None: """ Deploys into the EKS cluster the kubernetes metrics server :param cluster: :return: """ resource = ManifestGenerator.namespace_resource('metrics-server') namespace = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource) chart = cluster.add_chart( 'helm-chart-metrics-server', release="metrics-server", chart="metrics-server", namespace="metrics-server", repository=cls.HELM_REPOSITORY, version="4.2.1", values={ "extraArgs": { "kubelet-preferred-address-types": "InternalIP", }, "apiService": { "create": True, }, }, ) chart.node.add_dependency(namespace)
def add_to_cluster(cls, cluster: Cluster) -> None: """ Deploys into the EKS cluster the external secrets manager :param cluster: :return: """ namespace = "prometheus" resource = ManifestGenerator.namespace_resource(namespace) ns = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource) operator_sa = cluster.add_service_account( 'prometheus-operator', name=f'prometheus-operator', namespace=resource.get('metadata', {}).get('name'), ) operator_sa.node.add_dependency(ns) prometheus_sa = cluster.add_service_account( 'prometheus', name=f'prometheus', namespace=resource.get('metadata', {}).get('name'), ) prometheus_sa.node.add_dependency(ns) alertmanager_sa = cluster.add_service_account( 'alertmanager', name=f'alertmanager', namespace=resource.get('metadata', {}).get('name'), ) alertmanager_sa.node.add_dependency(ns) cls._create_chart_release(cluster, operator_sa, prometheus_sa, alertmanager_sa)
def add_to_cluster(cls, cluster: Cluster, kubernetes_version: str) -> None: """ Deploys into the EKS cluster the kubernetes cluster autoscaler :param cluster: :param kubernetes_version: :return: """ resource = ManifestGenerator.namespace_resource('cluster-autoscaler') namespace = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource) sa = cluster.add_service_account( 'ClusterAutoscalerServiceAccount', name='cluster-autoscaler', namespace=resource.get('metadata', {}).get('name'), ) sa.node.add_dependency(namespace) cls.attach_iam_policies_to_role(sa.role) chart = cluster.add_chart( "helm-chart-cluster-autoscaler", release="cluster-autoscaler", chart="cluster-autoscaler", namespace=sa.service_account_namespace, repository=cls.HELM_REPOSITORY, version="7.3.3", values={ "autoDiscovery": { "clusterName": cluster.cluster_name, }, "cloudProvider": "aws", "awsRegion": cluster.vpc.stack.region, "image": { "repository": "eu.gcr.io/k8s-artifacts-prod/autoscaling/cluster-autoscaler", "tag": cls._get_cluster_autoscaler_version(kubernetes_version), "pullPolicy": "Always", }, "extraArgs": { "balance-similar-node-groups": "true" }, "rbac": { "create": True, "serviceAccount": { "name": sa.service_account_name, "create": False, }, "pspEnabled": True, }, }, ) chart.node.add_dependency(sa)
def __init__(self, scope: BaseApp, id: str, vpc: Vpc, env_fqdn: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) kubernetes_version = scope.environment_config.get( 'eks', {}).get('kubernetesVersion') cluster_name = scope.prefixed_str( scope.environment_config.get('eks', {}).get('clusterName')) cluster_admin_role = Role( self, scope.prefixed_str('EKS-AdminRole'), role_name=scope.prefixed_str('EKS-AdminRole'), assumed_by=AccountRootPrincipal(), ) self.__cluster = eks_cluster = Cluster( self, cluster_name, cluster_name=cluster_name, vpc=vpc, version=KubernetesVersion.of(kubernetes_version), default_capacity=0, # We define later the capacity masters_role=cluster_admin_role, vpc_subnets=self._get_control_plane_subnets( scope), # Control plane subnets ) for profile in scope.environment_config.get('eks', {}).get( 'fargateProfiles', []): eks_cluster.add_fargate_profile( profile.get('name'), selectors=[ Selector(namespace=profile.get('namespace'), labels=profile.get('labels')) ]) asg_fleets = [] for fleet in scope.environment_config.get('eks', {}).get('workerNodesFleets'): if fleet.get('type') == 'managed': self.add_managed_fleet(eks_cluster, fleet) if fleet.get('type') == 'ASG': asg_fleets += self.add_asg_fleet(scope, eks_cluster, fleet) self._enable_cross_fleet_communication(asg_fleets) # Base cluster applications MetricsServer.add_to_cluster(eks_cluster) ClusterAutoscaler.add_to_cluster(eks_cluster, kubernetes_version) ExternalSecrets.add_to_cluster(eks_cluster) CertManager.add_to_cluster(eks_cluster) # Monitoring applications PrometheusOperator.add_to_cluster(eks_cluster) Grafana.add_to_cluster(eks_cluster, env_fqdn) # Logging & tracing applications Fluentd.add_to_cluster(eks_cluster) Loki.add_to_cluster(eks_cluster)
def add_managed_fleet(self, cluster: Cluster, fleet: dict): # To correctly scale the cluster we need our node groups to not span across AZs # to avoid the automatic AZ re-balance, hence we create a node group per subnet for counter, subnet in enumerate(cluster.vpc.private_subnets): fleet_id = f'{fleet.get("name")}-{counter}' cluster.add_nodegroup( id=fleet_id, instance_type=InstanceType(fleet.get('instanceType')), min_size=fleet.get('autoscaling', {}).get('minInstances'), max_size=fleet.get('autoscaling', {}).get('maxInstances'), labels=dict(**fleet.get('nodeLabels', {}), fleetName=fleet.get('name')), nodegroup_name= f'{fleet.get("name")}-{subnet.availability_zone}', subnets=SubnetSelection(subnets=[subnet]), )
def add_asg_fleet(self, scope: BaseApp, cluster: Cluster, fleet) -> List[AutoScalingGroup]: created_fleets: List[AutoScalingGroup] = [] node_labels = fleet.get('nodeLabels', {}) node_labels["fleetName"] = fleet.get('name') node_labels_as_str = ','.join(map('='.join, node_labels.items())) # Source of tweaks: https://kubedex.com/90-days-of-aws-eks-in-production/ kubelet_extra_args = ' '.join([ # Add node labels f'--node-labels {node_labels_as_str}' if len(node_labels_as_str) else '', # Capture resource reservation for kubernetes system daemons like the kubelet, container runtime, # node problem detector, etc. '--kube-reserved cpu=250m,memory=1Gi,ephemeral-storage=1Gi', # Capture resources for vital system functions, such as sshd, udev. '--system-reserved cpu=250m,memory=0.2Gi,ephemeral-storage=1Gi', # Start evicting pods from this node once these thresholds are crossed. '--eviction-hard memory.available<0.2Gi,nodefs.available<10%', ]) cluster_sg = SecurityGroup.from_security_group_id( self, 'eks-cluster-sg', security_group_id=cluster.cluster_security_group_id) asg_tags = { "k8s.io/cluster-autoscaler/enabled": "true", f"k8s.io/cluster-autoscaler/{cluster.cluster_name}": "owned", } # For correctly autoscaling the cluster we need our autoscaling groups to not span across AZs # to avoid the AZ Rebalance, hence we create an ASG per subnet for counter, subnet in enumerate(cluster.vpc.private_subnets): asg: AutoScalingGroup = cluster.add_capacity( id=scope.prefixed_str(f'{fleet.get("name")}-{counter}'), instance_type=InstanceType(fleet.get('instanceType')), min_capacity=fleet.get('autoscaling', {}).get('minInstances'), max_capacity=fleet.get('autoscaling', {}).get('maxInstances'), bootstrap_options=BootstrapOptions( kubelet_extra_args=kubelet_extra_args, ), spot_price=str(fleet.get('spotPrice')) if fleet.get('spotPrice') else None, vpc_subnets=SubnetSelection(subnets=[subnet]), ) created_fleets.append(asg) self._add_userdata_production_tweaks(asg) for key, value in asg_tags.items(): Tag.add(asg, key, value) return created_fleets
def add_to_cluster(cls, cluster: Cluster) -> None: """ Deploys into the EKS cluster the external secrets manager :param cluster: :return: """ resource = ManifestGenerator.namespace_resource('external-secrets') namespace = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource ) sa = cluster.add_service_account( 'ExternalSecretsServiceAccount', name='external-secrets', namespace=resource.get('metadata', {}).get('name'), ) sa.node.add_dependency(namespace) cls.attach_iam_policies_to_role(sa.role) chart = cluster.add_chart( "helm-chart-external-secrets", release="kubernetes-external-secrets", chart="kubernetes-external-secrets", namespace=sa.service_account_namespace, repository=cls.HELM_REPOSITORY, version="4.0.0", values={ "customResourceManagerDisabled": True, "env": { "AWS_REGION": cluster.vpc.stack.region, }, "rbac": { "create": True, "serviceAccount": { "name": sa.service_account_name, "create": False, }, }, }, ) chart.node.add_dependency(sa)
def _create_chart_release( cls, cluster: Cluster, ) -> None: chart = cluster.add_chart( "helm-chart-loki", release="loki", chart="loki-stack", namespace="loki", repository=cls.HELM_REPOSITORY, version="0.38.2", values=None, )
def _create_chart_release( cls, cluster: Cluster, ) -> None: cluster.add_chart( "helm-chart-fluentd", release="fluentd", chart="fluentd", namespace="fluentd", repository=cls.HELM_REPOSITORY, version="1.2.7", values={ "aggregator": { "replicaCount": 1, }, "serviceAccount": { "create": True, }, "metrics": { "enabled": True, }, }, )
def add_to_cluster(cls, cluster: Cluster, env_domain: str = 'example.com') -> None: """ Deploys into the EKS cluster the external secrets manager :param env_domain: :param cluster: :return: """ namespace = "grafana" resource = ManifestGenerator.namespace_resource(namespace) ns = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource) sa = cluster.add_service_account( 'grafana', name=f'grafana', namespace=resource.get('metadata', {}).get('name'), ) sa.node.add_dependency(ns) cls._create_chart_release(cluster, sa, env_domain)
def add_to_cluster(cls, cluster: Cluster, zone_type: ZoneType) -> None: """ Deploys into the EKS cluster the external secrets manager :param cluster: :param zone_type: :return: """ namespace = f"external-dns-{zone_type.value}" resource = ManifestGenerator.namespace_resource(namespace) ns = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource) sa = cluster.add_service_account( f'externalDnsServiceAccount-{zone_type.value}', name=f'external-dns-{zone_type.value}', namespace=resource.get('metadata', {}).get('name'), ) sa.node.add_dependency(ns) cls.attach_iam_policies_to_role(sa.role) cls._create_chart_release(cluster, sa, zone_type)
def _create_chart_release( cls, cluster: Cluster, service_account: ServiceAccount, env_domain: str, ) -> None: chart = cluster.add_chart( "helm-chart-grafana", release="grafana", chart="grafana", namespace=service_account.service_account_namespace, repository=cls.HELM_REPOSITORY, version="3.1.1", values={ "serviceAccount": { "create": False, "name": service_account.service_account_name, }, "ingress": { "enabled": True, "annotations": { "kubernetes.io/ingress.class": "istio", "external-dns-route53-public": "true", }, "hosts": [ { "name": f"grafana.{env_domain}", """ Note: the default implementation uses name for `servicePort`which is not supported by istio 1.6, hence we create an additional path until istio will support port names """ "extraPaths": [ { "path": "/*", "backend": { "serviceName": "grafana", "servicePort": 3000, }, }, ] }, ], }, }, ) chart.node.add_dependency(service_account)
def _create_chart_release(cls, cluster: Cluster, service_account: ServiceAccount, zone_type: ZoneType) -> None: chart = cluster.add_chart( f"helm-chart-external-dns-{zone_type.value}", release=f"ext-dns-{zone_type.value}", chart="external-dns", namespace=service_account.service_account_namespace, repository=cls.HELM_REPOSITORY, version="3.2.3", values={ "aws": { "region": cluster.vpc.stack.region, "zoneType": zone_type.value, # "zoneTags": [ # f"external-dns-route53-zone={zone_id}", # ], }, "policy": "sync", "serviceAccount": { "name": service_account.service_account_name, "create": False, }, "sources": [ 'service', 'ingress', 'istio-gateway', # 'istio-virtualservice', # Soon to be released, keep an eye on releases ], "txtOwnerId": cluster.cluster_name, "rbac": { "create": True, "pspEnabled": True, }, "replicas": 1, "metrics": { "enabled": True, }, "annotationFilter": f"external-dns-route53-{zone_type.value}=true", }, ) chart.node.add_dependency(service_account)
def _create_chart_release( cls, cluster: Cluster, operator_service_account: ServiceAccount, prometheus_service_account: ServiceAccount, alertmanager_service_account: ServiceAccount, ) -> None: chart = cluster.add_chart( "helm-chart-prometheus", release="prometheus", chart="prometheus-operator", namespace=operator_service_account.service_account_namespace, repository=cls.HELM_REPOSITORY, version="0.22.3", values={ "operator": { "serviceAccount": { "create": False, "name": operator_service_account.service_account_name, }, }, "prometheus": { "serviceAccount": { "create": False, "name": prometheus_service_account.service_account_name, }, }, "alertmanager": { "serviceAccount": { "create": False, "name": alertmanager_service_account.service_account_name, }, }, }, ) chart.node.add_dependency(operator_service_account) chart.node.add_dependency(prometheus_service_account) chart.node.add_dependency(alertmanager_service_account)
def add_to_cluster(cls, cluster: Cluster) -> None: """ Deploys cert-manager into the EKS cluster :param cluster: :return: """ resource = ManifestGenerator.namespace_resource('cert-manager') namespace = cluster.add_resource( f"{resource.get('kind')}-{resource.get('metadata', {}).get('name')}", resource ) sa = cluster.add_service_account( 'CertManagerServiceAccount', name='cert-manager', namespace=resource.get('metadata', {}).get('name'), ) sa.node.add_dependency(namespace) injector_sa = cluster.add_service_account( 'CertManagerCAInjectorServiceAccount', name='cert-manager-ca-injector', namespace=resource.get('metadata', {}).get('name'), ) injector_sa.node.add_dependency(namespace) webhook_sa = cluster.add_service_account( 'CertManagerWebhookServiceAccount', name='cert-manager-webhook', namespace=resource.get('metadata', {}).get('name'), ) webhook_sa.node.add_dependency(namespace) chart = cluster.add_chart( "helm-chart-cert-manager", release="cert-manager", chart="cert-manager", namespace="cert-manager", repository=cls.HELM_REPOSITORY, version="v0.15.2", values={ "global": { "podSecurityPolicy": { "enabled": True, }, }, "installCRDs": True, "serviceAccount": { "create": False, "name": sa.service_account_name, }, "cainjector": { "serviceAccount": { "create": False, "name": injector_sa.service_account_name }, }, "webhook": { "serviceAccount": { "create": False, "name": injector_sa.service_account_name }, }, }, ) chart.node.add_dependency(sa) chart.node.add_dependency(injector_sa) chart.node.add_dependency(webhook_sa)
def __init__(self, scope: core.Construct, id: str, cluster: eks.Cluster, **kwargs) -> None: super().__init__(scope, id, **kwargs) maps= [] self.roles=[] ecr_policy = iam.PolicyStatement( actions=[ "ecr:DescribeImages", "ecr:ListImages", "ecr:BatchDeleteImage" ], effect=iam.Effect.ALLOW, resources=[ "arn:aws:ecr:%s:%s:repository/%s" % (core.Stack.of(self).region, core.Stack.of(self).account, namespace) for namespace in self.node.try_get_context("kubernetes")['namespaces'] ] ) function = lbd.SingletonFunction( self, "ECRDeleteImagesFunction", uuid="19411b0e-0e80-4ad4-a316-3235940775e4", code=lbd.Code.from_asset( "custom_resources/kubernetes/" ), handler="config.handler", runtime=lbd.Runtime.PYTHON_3_7, function_name="kubernetesConfig", initial_policy=[ecr_policy], log_retention=logs.RetentionDays.ONE_DAY, timeout=core.Duration.seconds(30) ) provider = cr.Provider( self, "ECRDeleteImagesFunctionProvider", on_event_handler=function, log_retention=logs.RetentionDays.ONE_DAY ) repositores = [] for namespace in self.node.try_get_context("kubernetes")['namespaces']: manifest = cluster.add_manifest( "eksConfigNamespace-%s" % namespace, { "apiVersion": "v1", "kind": "Namespace", "metadata": { "name": namespace } } ) sa = cluster.add_service_account( "service-account-%s" % namespace, name="statement-demo", namespace=namespace ) sa.node.add_dependency(manifest) self.roles.append(sa.role) repository = ecr.Repository( self, ("repository-%s" % namespace), removal_policy=core.RemovalPolicy.DESTROY, repository_name=namespace, lifecycle_rules=[ecr.LifecycleRule(max_image_count=1)] ) repositores.append(repository.repository_arn) maps.append({ "apiVersion": "v1", "kind": "ConfigMap", "metadata": { "name": "application.properties", "namespace": namespace }, "data": { "application-aws.properties": Path("../%s/src/main/resources/application-aws.properties" % namespace).read_text() } }) core.CustomResource( self, "ECRDeleteImagesFunction-%s" % namespace, service_token=provider.service_token, properties={ "repository": namespace } ).node.add_dependency(repository) eks.KubernetesManifest( self, "eksConfigMaps", cluster=cluster, manifest=maps ) iam.Policy( self, "saPolicy", force=True, policy_name="EKSSAPolicy", roles=self.roles, statements=[ iam.PolicyStatement( actions=["cloudwatch:PutMetricData"], conditions={ "StringEquals": { "cloudwatch:namespace": "statement12" }, }, resources=["*"] ) ] )