def create_task_container(self): """ Create container """ ecs_healthcheck = ecs.HealthCheck( command=["CMD", "curl", "-f", "http://localhost:8080"]) log_settings = ecs.LogDrivers.aws_logs( stream_prefix="master", log_retention=logs.RetentionDays.TWO_WEEKS, ) container = self.task.add_container( "master", health_check=ecs_healthcheck, start_timeout=cdk.Duration.seconds(15), stop_timeout=cdk.Duration.seconds(15), image=self.define_container_image(), logging=log_settings, memory_reservation_mib=256, ) container.add_port_mappings( ecs.PortMapping(container_port=self.master_port, protocol=ecs.Protocol.UDP)) container.add_port_mappings( ecs.PortMapping(container_port=self.master_healthcheck_port, protocol=ecs.Protocol.TCP)) return container
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Janus Image janus_asset = DockerImageAsset(self, "JanusBuildImage", directory=os.path.join( dirname, "janus-image")) # VPC vpc = ec2.Vpc(self, "VPC", nat_gateways=0, subnet_configuration=[ ec2.SubnetConfiguration( name="public", subnet_type=ec2.SubnetType.PUBLIC) ]) # Create an ECS cluster cluster = ecs.Cluster(self, "JanusCluster", vpc=vpc) #Task definition task_definition = ecs.FargateTaskDefinition(self, 'JanusTask') task_definition.add_container( "Janus", image=ecs.ContainerImage.from_docker_image_asset(janus_asset), cpu=256, memory_limit_mib=512, logging=ecs.LogDriver.aws_logs( stream_prefix='JanusTask', log_retention=RetentionDays("ONE_DAY")), health_check=ecs.HealthCheck(command=[ "CMD-SHELL", "curl -fs http://localhost:7088/admin | grep error" ])) #Service ecs.FargateService(self, 'JanusService', cluster=cluster, task_definition=task_definition, desired_count=1, assign_public_ip=True)
def CreateSVC(self,ZachTaskList,ZachECSNodeName,cluster,vpc,AppendHostFile,ENV_VARS): for TaskName, TaskValue in ZachTaskList.items(): ZachTaskDef = ecs.TaskDefinition(self, id=ZachECSNodeName + "-" + TaskName,compatibility=ecs.Compatibility.EC2,network_mode=ecs.NetworkMode.AWS_VPC) core.CfnOutput(self, id=TaskName + "ARN", value=ZachTaskDef.task_definition_arn) for num in range(TaskValue.get("num", 1)): container = ZachTaskDef.add_container(id=ZachECSNodeName + "-" + TaskName + str(num), cpu=1, memory_limit_mib=512, memory_reservation_mib=256, readonly_root_filesystem=True, working_directory="/data/web", user='******', health_check=ecs.HealthCheck(command=["ping 127.0.0.1"], interval=core.Duration.seconds(30), retries=5, start_period=core.Duration.minutes(1), timeout=core.Duration.seconds(10)), hostname=ZachECSNodeName + "-" + TaskName, extra_hosts=AppendHostFile, environment=ENV_VARS, docker_labels=ENV_VARS, image=ecs.ContainerImage.from_registry(TaskValue.get("image", "nginx:latest")), logging=ecs.LogDrivers.fluentd()) port_mapping = ecs.PortMapping( container_port=TaskValue.get("port", 80), host_port=TaskValue.get("port", 80), protocol=ecs.Protocol.TCP ) container.add_port_mappings(port_mapping) core.CfnOutput(self, id=container.container_name + "ContainPort", value=str(container.container_port)) core.CfnOutput(self, id=container.container_name + "MemLimit", value=str(container.memory_limit_specified)) core.CfnOutput(self, id=container.container_name + "HostPort", value=str(port_mapping.host_port)) svc = ecs.Ec2Service(self, id=ZachECSNodeName+TaskName, task_definition=ZachTaskDef, cluster=cluster, desired_count=2, security_group=self.VPC_SG(TaskName,vpc), assign_public_ip=True, # health_check_grace_period=core.Duration.seconds(30), # Health check grace period is only valid for services configured to use load balancers service_name=ZachECSNodeName+TaskName) svc.add_placement_strategies(ecs.PlacementStrategy.spread_across(ecs.BuiltInAttributes.INSTANCE_ID), ecs.PlacementStrategy.packed_by(ecs.BinPackResource.MEMORY)) core.CfnOutput(self, id=ZachECSNodeName+TaskName + "ServiceName", value=svc.service_name) core.CfnOutput(self, id=ZachECSNodeName+TaskName + "ServiceARN", value=svc.service_arn) core.CfnOutput(self, id=ZachECSNodeName+TaskName + "ARN", value=cluster.cluster_arn) core.CfnOutput(self, id=ZachECSNodeName+TaskName + "VPCID", value=str(cluster.vpc.vpc_id)) core.CfnOutput(self, id=ZachECSNodeName+TaskName + "VPCZone", value=str(cluster.vpc.availability_zones))
def appmesh(self): # This will create the app mesh (control plane) self.mesh = aws_appmesh.Mesh(self, "EcsWorkShop-AppMesh", mesh_name="ecs-mesh") # We will create a App Mesh Virtual Gateway self.mesh_vgw = aws_appmesh.VirtualGateway( self, "Mesh-VGW", mesh=self.mesh, listeners=[aws_appmesh.VirtualGatewayListener.http(port=3000)], virtual_gateway_name="ecsworkshop-vgw") # Creating the mesh gateway task for the frontend app # For more info related to App Mesh Proxy check https://docs.aws.amazon.com/app-mesh/latest/userguide/getting-started-ecs.html self.mesh_gw_proxy_task_def = aws_ecs.FargateTaskDefinition( self, "mesh-gw-proxy-taskdef", cpu=256, memory_limit_mib=512, family="mesh-gw-proxy-taskdef", ) # LogGroup for the App Mesh Proxy Task self.logGroup = aws_logs.LogGroup( self, "ecsworkshopMeshGateway", #log_group_name="ecsworkshop-mesh-gateway", retention=aws_logs.RetentionDays.ONE_WEEK) # App Mesh Virtual Gateway Envoy proxy Task definition # For a use specific ECR region, please check https://docs.aws.amazon.com/app-mesh/latest/userguide/envoy.html container = self.mesh_gw_proxy_task_def.add_container( "mesh-gw-proxy-contdef", image=aws_ecs.ContainerImage.from_registry( "public.ecr.aws/appmesh/aws-appmesh-envoy:v1.18.3.0-prod"), container_name="envoy", memory_reservation_mib=256, environment={ "REGION": getenv('AWS_DEFAULT_REGION'), "ENVOY_LOG_LEVEL": "info", "ENABLE_ENVOY_STATS_TAGS": "1", # "ENABLE_ENVOY_XRAY_TRACING": "1", "APPMESH_RESOURCE_ARN": self.mesh_vgw.virtual_gateway_arn }, essential=True, logging=aws_ecs.LogDriver.aws_logs(stream_prefix='/mesh-gateway', log_group=self.logGroup), health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/server_info | grep state | grep -q LIVE" ], )) # Default port where frontend app is listening container.add_port_mappings(aws_ecs.PortMapping(container_port=3000)) #ammmesh-xray-uncomment # xray_container = self.mesh_gw_proxy_task_def.add_container( # "FrontendServiceXrayContdef", # image=aws_ecs.ContainerImage.from_registry("amazon/aws-xray-daemon"), # logging=aws_ecs.LogDriver.aws_logs( # stream_prefix='/xray-container', # log_group=self.logGroup # ), # essential=True, # container_name="xray", # memory_reservation_mib=256, # user="******" # ) # container.add_container_dependencies(aws_ecs.ContainerDependency( # container=xray_container, # condition=aws_ecs.ContainerDependencyCondition.START # ) # ) #ammmesh-xray-uncomment # For environment variables check https://docs.aws.amazon.com/app-mesh/latest/userguide/envoy-config.html self.mesh_gateway_proxy_fargate_service = aws_ecs_patterns.NetworkLoadBalancedFargateService( self, "MeshGW-Proxy-Fargate-Service", service_name='mesh-gw-proxy', cpu=256, memory_limit_mib=512, desired_count=1, listener_port=80, assign_public_ip=True, task_definition=self.mesh_gw_proxy_task_def, cluster=self.ecs_cluster, public_load_balancer=True, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=self.ecs_cluster. default_cloud_map_namespace, name='mesh-gw-proxy')) # For testing purposes we will open any ipv4 requests to port 3000 self.mesh_gateway_proxy_fargate_service.service.connections.allow_from_any_ipv4( port_range=aws_ec2.Port(protocol=aws_ec2.Protocol.TCP, string_representation="vtw_proxy", from_port=3000, to_port=3000), description="Allow NLB connections on port 3000") self.mesh_gw_proxy_task_def.default_container.add_ulimits( aws_ecs.Ulimit(hard_limit=15000, name=aws_ecs.UlimitName.NOFILE, soft_limit=15000)) #Adding necessary policies for Envoy proxy to communicate with required services self.mesh_gw_proxy_task_def.execution_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonEC2ContainerRegistryReadOnly")) self.mesh_gw_proxy_task_def.execution_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( "CloudWatchLogsFullAccess")) self.mesh_gw_proxy_task_def.task_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( "CloudWatchFullAccess")) # self.mesh_gw_proxy_task_def.task_role.add_managed_policy(aws_iam.ManagedPolicy.from_aws_managed_policy_name("AWSXRayDaemonWriteAccess")) self.mesh_gw_proxy_task_def.task_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AWSAppMeshEnvoyAccess")) self.mesh_gw_proxy_task_def.execution_role.add_to_policy( aws_iam.PolicyStatement(actions=['ec2:DescribeSubnets'], resources=['*'])) core.CfnOutput(self, "MeshGwNlbDns", value=self.mesh_gateway_proxy_fargate_service. load_balancer.load_balancer_dns_name, export_name="MeshGwNlbDns") core.CfnOutput(self, "MeshArn", value=self.mesh.mesh_arn, export_name="MeshArn") core.CfnOutput(self, "MeshName", value=self.mesh.mesh_name, export_name="MeshName") core.CfnOutput( self, "MeshEnvoyServiceArn", value=self.mesh_gateway_proxy_fargate_service.service.service_arn, export_name="MeshEnvoyServiceArn") core.CfnOutput(self, "MeshVGWArn", value=self.mesh_vgw.virtual_gateway_arn, export_name="MeshVGWArn") core.CfnOutput(self, "MeshVGWName", value=self.mesh_vgw.virtual_gateway_name, export_name="MeshVGWName")
def configure_container(self, appname: str, props: Props, tgroups: {}): virtnodes = {} if appname == 'gateway' or appname == 'tcpecho': colors = [''] else: colors = props.colors for color in colors: fullname = color.upper()+appname td = ecs.FargateTaskDefinition(self, fullname+'_task', cpu='256', memory_mi_b='512', execution_role=props.taskexeciamrole, task_role=props.taskiamrole) env = {} if appname != 'tcpecho': td.node.find_child('Resource').add_property_override('proxyConfiguration', { 'type': 'APPMESH', 'containerName': 'envoy', 'proxyConfigurationProperties': [ {'name': 'IgnoredUID', 'value': '1337'}, {'name': 'ProxyIngressPort', 'value': '15000'}, {'name': 'ProxyEgressPort', 'value': '15001'}, {'name': 'AppPorts', 'value': '9080'}, {'name': 'EgressIgnoredIPs', 'value': '169.254.170.2,169.254.169.254'} ] }) env = { 'SERVER_PORT': '9080' } if appname != 'tcpecho': contimage = ecs.EcrImage.from_ecr_repository(props.repos[appname], tag='latest') else: contimage = ecs.ContainerImage.from_registry('cjimti/go-echo') port = 9080 if appname == 'gateway': env['COLOR_TELLER_ENDPOINT'] = props.repos['colorteller'].repository_name +\ '.'+props.cluster.default_namespace.namespace_name+':9080' env['TCP_ECHO_ENDPOINT'] = 'tcpecho.'+props.cluster.default_namespace.namespace_name+':2701' elif appname == 'colorteller': env['COLOR'] = color else: env = {'TCP_PORT': '2701', 'NODE_NAME': 'mesh/' + props.mesh.mesh_name + '/virtualNode/tcpecho--vn'} port = 2701 cont = ecs.ContainerDefinition(self, fullname+'-container', task_definition=td, essential=True, logging=ecs.AwsLogDriver(self, fullname+'-logs', stream_prefix=fullname), image=contimage, environment=env) #cont.add_port_mappings(container_port=port, host_port=port, protocol=ecs.Protocol.Tcp) # X-Ray and Envoy definition ---------------------------------------------------------------------------- if appname != 'tcpecho': xrayimage = ecs.ContainerImage.from_registry('amazon/aws-xray-daemon') xtask = td.add_container('xray-daemon', image=xrayimage, cpu=32, memory_reservation_mi_b=256, logging=ecs.AwsLogDriver(self, fullname+'-xray-logs', stream_prefix=fullname+'-xray'), essential=True, user='******') xtask.add_port_mappings(container_port=2000, host_port=2000, protocol=ecs.Protocol.Udp) # Envoy definition ---------------------------------------------------------------------------------- ENVOY_IMAGE_LOC = '111345817488.dkr.ecr.us-west-2.amazonaws.com/aws-appmesh-envoy:v1.9.1.0-prod' envoyimage = ecs.EcrImage.from_registry(ENVOY_IMAGE_LOC) envoyenv = { 'APPMESH_VIRTUAL_NODE_NAME': 'mesh/'+props.mesh.mesh_name+'/virtualNode/'+appname+'-'+color+'-vn', 'ENABLE_ENVOY_XRAY_TRACING': '1', 'ENABLE_ENVOY_STATS_TAGS': '1', 'ENVOY_LOG_LEVEL': 'debug' } if appname == 'gateway': envoyenv['APPMESH_VIRTUAL_NODE_NAME'] = 'mesh/'+props.mesh.mesh_name+'/virtualNode/gateway--vn' envoy_hc = ecs.HealthCheck() envoy_hc['command'] = ['CMD-SHELL', 'curl -s http://localhost:9901/server_info | grep state | grep -q LIVE'] envoy_hc['interval'] = 5 envoy_hc['timeout'] = 2 envoy_hc['retries'] = 3 etask = td.add_container('envoy', image=envoyimage, user='******', essential=True, environment=envoyenv, logging=ecs.AwsLogDriver(self, fullname+'-envoy-logs', stream_prefix=fullname+'-envoy'), health_check=envoy_hc) etask.add_port_mappings(container_port=9901, host_port=9901, protocol=ecs.Protocol.Tcp) etask.add_port_mappings(container_port=15000, host_port=15000, protocol=ecs.Protocol.Tcp) etask.add_port_mappings(container_port=15001, host_port=15001, protocol=ecs.Protocol.Tcp) # Prometheus & Grafana definition for Gateway --------------------------------------------------------- if appname == 'gateway': prometheusimage = ecs.EcrImage.from_ecr_repository(props.repos['prometheus'], tag='latest') ptask = td.add_container('prometheus', image=prometheusimage, essential=True, logging=ecs.AwsLogDriver(self, appname + '-prometheus-logs', stream_prefix=appname + '-prometheus')) ptask.add_port_mappings(container_port=9090, host_port=9090) grafanaimage = ecs.ContainerImage.from_registry('grafana/grafana:latest') gtask = td.add_container('grafana', image=grafanaimage, essential=True, logging=ecs.AwsLogDriver(self, appname + '-grafana-logs', stream_prefix=appname + '-grafana')) gtask.add_port_mappings(container_port=3000, host_port=3000) disco = ecs.ServiceDiscoveryOptions() disco['dnsRecordType'] = sdisc.DnsRecordType.A disco['dnsTtlSec'] = 3000 if color == 'white': disco['name'] = 'colorteller' elif appname != 'gateway' and appname != 'tcpecho': disco['name'] = 'colorteller-'+color elif appname == 'gateway': disco['name'] = 'colorgateway' else: disco['name'] = 'tcpecho' svc = ecs.FargateService(self, fullname+'Service', maximum_percent=200, minimum_healthy_percent=100, desired_count=1, task_definition=td, cluster=props.cluster, vpc_subnets=props.vpc.private_subnets, security_group=props.csg, service_discovery_options=disco) if appname == 'gateway': svc._load_balancers = [{'containerName': 'grafana', 'containerPort': 3000, 'targetGroupArn': tgroups['grafana'].target_group_arn}] path = '/ping' if appname != 'tcpecho' else '/' spec = { 'listeners': [{ 'portMapping': {'port': port, 'protocol': 'http'}, 'healthCheck': {'protocol': 'http', 'path': path, 'healthyThreshold': 2, 'unhealthyThreshold': 2, 'timeoutMillis': 2000, 'intervalMillis': 5000}}], 'serviceDiscovery': { 'dns': {'hostname': svc._cloudmap_service.service_name+'.'+ props.cluster.default_namespace.namespace_name} } } if appname == 'gateway': spec['backends'] = [ {'virtualService': {'virtualServiceName': 'colorteller'+'.'+props.cluster.default_namespace.namespace_name}}, {'virtualService': {'virtualServiceName': 'tcpecho' + '.' + props.cluster.default_namespace.namespace_name}}, ] # Create AppMesh virtual nodes ------------------------------------------------------------------------ vn = appmesh.CfnVirtualNode(self, fullname + 'VirtualNode', mesh_name=props.mesh.mesh_name, virtual_node_name=appname + '-' + color + '-vn', spec=spec) virtnodes[fullname] = vn return virtnodes
def __init__(self, scope: core.Construct, construct_id: str, *, secrets: List[Secret]): super().__init__(scope, construct_id) vpc = aws_ec2.Vpc( self, "Vpc", enable_dns_support=True, enable_dns_hostnames=True, max_azs=3, nat_gateways=0, subnet_configuration=[ aws_ec2.SubnetConfiguration( name="Public", subnet_type=aws_ec2.SubnetType.PUBLIC) ], ) postgres_volume_name = "duckbot_dbdata" file_system = aws_efs.FileSystem( self, "PostgresFileSystem", vpc=vpc, encrypted=True, file_system_name=postgres_volume_name, removal_policy=core.RemovalPolicy.DESTROY) file_system.node.default_child.override_logical_id( "FileSystem" ) # rename for compatibility with legacy cloudformation template task_definition = aws_ecs.TaskDefinition( self, "TaskDefinition", compatibility=aws_ecs.Compatibility.EC2, family="duckbot", memory_mib="960", network_mode=aws_ecs.NetworkMode.BRIDGE) postgres_data_path = "/data/postgres" postgres = task_definition.add_container( "postgres", container_name="postgres", image=aws_ecs.ContainerImage.from_registry("postgres:13.2"), essential=False, environment={ "POSTGRES_USER": "******", "POSTGRES_PASSWORD": "******", "PGDATA": postgres_data_path, }, health_check=aws_ecs.HealthCheck( command=["CMD", "pg_isready", "-U", "duckbot"], interval=core.Duration.seconds(30), timeout=core.Duration.seconds(5), retries=3, start_period=core.Duration.seconds(30), ), logging=aws_ecs.LogDriver.aws_logs( stream_prefix="ecs", log_retention=aws_logs.RetentionDays.ONE_MONTH), memory_reservation_mib=128, ) task_definition.add_volume( name=postgres_volume_name, efs_volume_configuration=aws_ecs.EfsVolumeConfiguration( file_system_id=file_system.file_system_id, root_directory="/")) postgres.add_mount_points( aws_ecs.MountPoint(source_volume=postgres_volume_name, container_path=postgres_data_path, read_only=False)) secrets_as_parameters = { # note, parameter version is required by cdk, but does not make it into the template; specify version 1 for simplicity x.environment_name: aws_ssm.StringParameter.from_secure_string_parameter_attributes( self, x.environment_name, parameter_name=x.parameter_name, version=1) for x in secrets } duckbot = task_definition.add_container( "duckbot", container_name="duckbot", essential=True, image=aws_ecs.ContainerImage.from_registry( self.node.try_get_context("duckbot_image")), environment={"STAGE": "prod"}, secrets={ k: aws_ecs.Secret.from_ssm_parameter(v) for k, v in secrets_as_parameters.items() }, health_check=aws_ecs.HealthCheck( command=["CMD", "python", "-m", "duckbot.health"], interval=core.Duration.seconds(30), timeout=core.Duration.seconds(10), retries=3, start_period=core.Duration.seconds(30), ), logging=aws_ecs.LogDriver.aws_logs( stream_prefix="ecs", log_retention=aws_logs.RetentionDays.ONE_MONTH), memory_reservation_mib=128, ) duckbot.add_link(postgres) asg = aws_autoscaling.AutoScalingGroup( self, "AutoScalingGroup", min_capacity=0, max_capacity=1, desired_capacity=1, machine_image=aws_ecs.EcsOptimizedImage.amazon_linux2(), instance_type=aws_ec2.InstanceType("t2.micro"), key_name="duckbot", # needs to be created manually instance_monitoring=aws_autoscaling.Monitoring.BASIC, vpc=vpc, ) asg.connections.allow_to_default_port(file_system) asg.connections.allow_from(aws_ec2.Peer.any_ipv4(), aws_ec2.Port.tcp(22)) asg.connections.allow_from(aws_ec2.Peer.any_ipv4(), aws_ec2.Port.tcp(80)) asg.connections.allow_from(aws_ec2.Peer.any_ipv4(), aws_ec2.Port.tcp(443)) cluster = aws_ecs.Cluster(self, "Cluster", cluster_name="duckbot", vpc=vpc) cluster.add_asg_capacity_provider( aws_ecs.AsgCapacityProvider(cluster, "AsgCapacityProvider", auto_scaling_group=asg), can_containers_access_instance_role=True) aws_ecs.Ec2Service( self, "Service", service_name="duckbot", cluster=cluster, task_definition=task_definition, desired_count=1, min_healthy_percent=0, max_healthy_percent=100, )
def __init__(self, scope, id, vpc, **kwarg) -> None: super().__init__(scope, id, **kwarg) # cluster creation cluster = aws_ecs.Cluster(self, 'fargate-service-autoscaling', vpc=vpc) # service discovery creation sd_namespace = cluster.add_default_cloud_map_namespace( name="svc.test.local", vpc=vpc) aws_servicediscovery.Service(self, "svc.test.local", namespace=sd_namespace, load_balancer=True) # ECS role creation ecs_principle = aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com') execution_role = aws_iam.Role(self, 'execution-role', assumed_by=ecs_principle) execution_role.add_managed_policy( policy=aws_iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AWSCodeDeployRoleForECS")) execution_role.add_managed_policy( policy=aws_iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AmazonEC2ContainerRegistryReadOnly")) task_role = aws_iam.Role(self, 'task-role', assumed_by=ecs_principle) task_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AWSAppMeshEnvoyAccess")) task_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="CloudWatchFullAccess")) task_role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AWSXRayDaemonWriteAccess")) # envoy ecr object envoy_ecr = aws_ecr.Repository.from_repository_attributes( self, 'aws-envoy', repository_arn=core.Stack.of(self).format_arn( service="ecr", resource="aws-appmesh-envoy", account="840364872350"), repository_name="aws-appmesh-envoy") # colorteller image builds gateway_image = aws_ecs.ContainerImage.from_asset("./src/gateway") colorteller_image = aws_ecs.ContainerImage.from_asset( "./src/colorteller") # logging setup log_group = aws_logs.LogGroup(self, "/ecs/colorteller", retention=aws_logs.RetentionDays.ONE_DAY) gateway_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="gateway") black_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="black") blue_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="blue") red_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="red") white_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="white") tcpecho_ecs_logs = aws_ecs.LogDriver.aws_logs(log_group=log_group, stream_prefix="tcpecho") # Mesh properties setup mesh_properties = aws_ecs.AppMeshProxyConfigurationProps( app_ports=[9080], proxy_egress_port=15001, proxy_ingress_port=15000, egress_ignored_i_ps=["169.254.170.2", "169.254.169.254"], ignored_uid=1337) # envoy ulimit defaults envoy_ulimit = aws_ecs.Ulimit(hard_limit=15000, name=aws_ecs.UlimitName.NOFILE, soft_limit=15000) # fargate task def - requires envoy proxy container, gateway app and x-ray gateway_task_def = aws_ecs.FargateTaskDefinition( self, "gateway_task", cpu=256, memory_limit_mib=512, execution_role=execution_role, task_role=task_role, proxy_configuration=aws_ecs.AppMeshProxyConfiguration( container_name="envoy", properties=mesh_properties)) gateway_task_def.add_container("gateway", logging=gateway_ecs_logs, environment={ "SERVER_PORT": "9080", "STAGE": "v1.1", "COLOR_TELLER_ENDPOINT": "colorteller.svc.test.local:9080", "TCP_ECHO_ENDPOINT": "tcpecho.svc.test.local:2701" }, image=gateway_image).add_port_mappings( aws_ecs.PortMapping( container_port=9080, protocol=aws_ecs.Protocol.TCP)) gateway_task_def.add_container( "xray", logging=gateway_ecs_logs, image=aws_ecs.ContainerImage.from_registry( "amazon/aws-xray-daemon")).add_port_mappings( aws_ecs.PortMapping(container_port=2000, protocol=aws_ecs.Protocol.UDP)) gateway_envoy_container = gateway_task_def.add_container( "envoy", logging=gateway_ecs_logs, environment={ "ENVOY_LOG_LEVEL": "debug", "ENABLE_ENVOY_XRAY_TRACING": "1", "ENABLE_ENVOY_STATS_TAGS": "1", "APPMESH_VIRTUAL_NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/gateway", "APPMESH_XDS_ENDPOINT": "" }, image=aws_ecs.ContainerImage.from_ecr_repository( repository=envoy_ecr, tag="v1.12.1.1-prod"), essential=True, user="******", health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/ready |grep -q LIVE" ])) gateway_envoy_container.add_port_mappings( aws_ecs.PortMapping(container_port=9901, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15000, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15001, protocol=aws_ecs.Protocol.TCP), ) gateway_envoy_container.add_ulimits(envoy_ulimit) # black task def - requires color app, envoy and x-ray containers black_task_def = aws_ecs.FargateTaskDefinition( self, "black-task", cpu=256, family="black", memory_limit_mib=512, execution_role=execution_role, task_role=task_role, proxy_configuration=aws_ecs.AppMeshProxyConfiguration( container_name="envoy", properties=mesh_properties)) black_envoy_container = black_task_def.add_container( "envoy", logging=black_ecs_logs, environment={ "ENVOY_LOG_LEVEL": "info", "ENABLE_ENVOY_XRAY_TRACING": "1", "ENABLE_ENVOY_STATS_TAGS": "1", "APPMESH_VIRTUAL_NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/black", "APPMESH_XDS_ENDPOINT": "" }, image=aws_ecs.ContainerImage.from_ecr_repository( repository=envoy_ecr, tag="v1.12.1.1-prod"), essential=True, user="******", health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/ready |grep -q LIVE" ])) black_envoy_container.add_port_mappings( aws_ecs.PortMapping(container_port=9901, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15000, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15001, protocol=aws_ecs.Protocol.TCP), ) black_envoy_container.add_ulimits(envoy_ulimit) black_app_container = black_task_def.add_container( "black", logging=black_ecs_logs, environment={ "COLOR": "black", "SERVER_PORT": "9080", "STAGE": "v1.1" }, image=colorteller_image) black_app_container.add_port_mappings( aws_ecs.PortMapping(container_port=9080, protocol=aws_ecs.Protocol.TCP)) black_app_container.add_container_dependencies( aws_ecs.ContainerDependency( container=black_envoy_container, condition=aws_ecs.ContainerDependencyCondition.HEALTHY)) black_task_def.add_container( "xray", logging=black_ecs_logs, image=aws_ecs.ContainerImage.from_registry( "amazon/aws-xray-daemon")).add_port_mappings( aws_ecs.PortMapping(container_port=2000, protocol=aws_ecs.Protocol.UDP)) # blue task def (same as black) blue_task_def = aws_ecs.FargateTaskDefinition( self, "blue-task", cpu=256, family="blue", memory_limit_mib=512, execution_role=execution_role, task_role=task_role, proxy_configuration=aws_ecs.AppMeshProxyConfiguration( container_name="envoy", properties=mesh_properties)) blue_envoy_container = blue_task_def.add_container( "envoy", logging=blue_ecs_logs, environment={ "ENVOY_LOG_LEVEL": "info", "ENABLE_ENVOY_XRAY_TRACING": "1", "ENABLE_ENVOY_STATS_TAGS": "1", "APPMESH_VIRTUAL_NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/blue", "APPMESH_XDS_ENDPOINT": "" }, image=aws_ecs.ContainerImage.from_ecr_repository( repository=envoy_ecr, tag="v1.12.1.1-prod"), essential=True, user="******", health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/ready |grep -q LIVE" ])) blue_envoy_container.add_port_mappings( aws_ecs.PortMapping(container_port=9901, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15000, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15001, protocol=aws_ecs.Protocol.TCP), ) blue_envoy_container.add_ulimits(envoy_ulimit) blue_app_container = blue_task_def.add_container( "blue", logging=blue_ecs_logs, environment={ "COLOR": "black", "SERVER_PORT": "9080", "STAGE": "v1.1" }, image=colorteller_image) blue_app_container.add_port_mappings( aws_ecs.PortMapping(container_port=9080, protocol=aws_ecs.Protocol.TCP)) blue_app_container.add_container_dependencies( aws_ecs.ContainerDependency( container=blue_envoy_container, condition=aws_ecs.ContainerDependencyCondition.HEALTHY)) blue_task_def.add_container( "xray", logging=blue_ecs_logs, image=aws_ecs.ContainerImage.from_registry( "amazon/aws-xray-daemon")).add_port_mappings( aws_ecs.PortMapping(container_port=2000, protocol=aws_ecs.Protocol.UDP)) # red task def (same as black) red_task_def = aws_ecs.FargateTaskDefinition( self, "red-task", cpu=256, family="red-task", memory_limit_mib=512, execution_role=execution_role, task_role=task_role, proxy_configuration=aws_ecs.AppMeshProxyConfiguration( container_name="envoy", properties=mesh_properties)) red_envoy_container = red_task_def.add_container( "envoy", logging=red_ecs_logs, environment={ "ENVOY_LOG_LEVEL": "info", "ENABLE_ENVOY_XRAY_TRACING": "1", "ENABLE_ENVOY_STATS_TAGS": "1", "APPMESH_VIRTUAL_NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/red", "APPMESH_XDS_ENDPOINT": "" }, image=aws_ecs.ContainerImage.from_ecr_repository( repository=envoy_ecr, tag="v1.12.1.1-prod"), essential=True, user="******", health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/ready |grep -q LIVE" ])) red_envoy_container.add_port_mappings( aws_ecs.PortMapping(container_port=9901, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15000, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15001, protocol=aws_ecs.Protocol.TCP), ) red_envoy_container.add_ulimits(envoy_ulimit) red_app_container = red_task_def.add_container("red", logging=red_ecs_logs, environment={ "COLOR": "red", "SERVER_PORT": "9080", "STAGE": "v1.2" }, image=colorteller_image) red_app_container.add_port_mappings( aws_ecs.PortMapping(container_port=9080, protocol=aws_ecs.Protocol.TCP)) red_app_container.add_container_dependencies( aws_ecs.ContainerDependency( container=red_envoy_container, condition=aws_ecs.ContainerDependencyCondition.HEALTHY)) red_task_def.add_container( "xray", logging=red_ecs_logs, image=aws_ecs.ContainerImage.from_registry( "amazon/aws-xray-daemon")).add_port_mappings( aws_ecs.PortMapping(container_port=2000, protocol=aws_ecs.Protocol.UDP)) # white task def (same as black) - colorteller.svc.test.local points to this service (because containers need something to resolve to or they fail) white_task_def = aws_ecs.FargateTaskDefinition( self, "white-task", cpu=256, family="white", memory_limit_mib=512, execution_role=execution_role, task_role=task_role, proxy_configuration=aws_ecs.AppMeshProxyConfiguration( container_name="envoy", properties=mesh_properties)) white_envoy_container = white_task_def.add_container( "envoy", logging=white_ecs_logs, environment={ "ENVOY_LOG_LEVEL": "info", "ENABLE_ENVOY_XRAY_TRACING": "1", "ENABLE_ENVOY_STATS_TAGS": "1", "APPMESH_VIRTUAL_NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/white", "APPMESH_XDS_ENDPOINT": "" }, image=aws_ecs.ContainerImage.from_ecr_repository( repository=envoy_ecr, tag="v1.12.1.1-prod"), essential=True, user="******", health_check=aws_ecs.HealthCheck(command=[ "CMD-SHELL", "curl -s http://localhost:9901/ready |grep -q LIVE" ])) white_envoy_container.add_port_mappings( aws_ecs.PortMapping(container_port=9901, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15000, protocol=aws_ecs.Protocol.TCP), aws_ecs.PortMapping(container_port=15001, protocol=aws_ecs.Protocol.TCP), ) white_envoy_container.add_ulimits(envoy_ulimit) white_app_container = white_task_def.add_container( "white", logging=white_ecs_logs, environment={ "COLOR": "white", "SERVER_PORT": "9080", "STAGE": "v1.1" }, image=colorteller_image) white_app_container.add_port_mappings( aws_ecs.PortMapping(container_port=9080, protocol=aws_ecs.Protocol.TCP)) white_app_container.add_container_dependencies( aws_ecs.ContainerDependency( container=white_envoy_container, condition=aws_ecs.ContainerDependencyCondition.HEALTHY)) white_task_def.add_container( "xray", logging=white_ecs_logs, image=aws_ecs.ContainerImage.from_registry( "amazon/aws-xray-daemon")).add_port_mappings( aws_ecs.PortMapping(container_port=2000, protocol=aws_ecs.Protocol.UDP)) # tcpecho service (external docker image) tcpecho_task_def = aws_ecs.FargateTaskDefinition( self, 'tcpecho-tasks', cpu=256, family="tcpecho", memory_limit_mib=512, execution_role=execution_role, task_role=task_role) tcpecho_task_def.add_container( "tcpecho", logging=tcpecho_ecs_logs, environment={ "TCP_PORT": "2701", "NODE_NAME": "mesh/ColorTellerAppMesh/virtualNode/echo" }, image=aws_ecs.ContainerImage.from_registry("cjimti/go-echo"), essential=True, ).add_port_mappings( aws_ecs.PortMapping(container_port=2701, protocol=aws_ecs.Protocol.TCP)) # adds task defs to fargate services - adds security group access to local vpc cidr block # all the services are treated the same way gateway_fargate_service = aws_ecs.FargateService( self, "gateway", cluster=cluster, task_definition=gateway_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="gateway")) gateway_fargate_service.connections.security_groups[ 0].add_ingress_rule(peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(9080), description="Allow http inbound from VPC") black_colorteller_fargate_service = aws_ecs.FargateService( self, "black", cluster=cluster, task_definition=black_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="black")) black_colorteller_fargate_service.connections.security_groups[ 0].add_ingress_rule(peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(9080), description="Allow http inbound from VPC") blue_colorteller_fargate_service = aws_ecs.FargateService( self, "blue", cluster=cluster, task_definition=blue_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="blue")) blue_colorteller_fargate_service.connections.security_groups[ 0].add_ingress_rule(peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(9080), description="Allow http inbound from VPC") red_colorteller_fargate_service = aws_ecs.FargateService( self, "red", cluster=cluster, task_definition=red_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="red")) red_colorteller_fargate_service.connections.security_groups[ 0].add_ingress_rule(peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(9080), description="Allow http inbound from VPC") white_colorteller_fargate_service = aws_ecs.FargateService( self, "white", cluster=cluster, task_definition=white_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="colorteller")) white_colorteller_fargate_service.connections.security_groups[ 0].add_ingress_rule(peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(9080), description="Allow http inbound from VPC") echo_fargate_service = aws_ecs.FargateService( self, "tcpecho", cluster=cluster, task_definition=tcpecho_task_def, desired_count=2, cloud_map_options=aws_ecs.CloudMapOptions( cloud_map_namespace=sd_namespace, name="tcpecho")) echo_fargate_service.connections.security_groups[0].add_ingress_rule( peer=aws_ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=aws_ec2.Port.tcp(2701), description="Allow http inbound from VPC") # adds autoscaling policies to all services for service in [ black_colorteller_fargate_service, blue_colorteller_fargate_service, red_colorteller_fargate_service, white_colorteller_fargate_service, gateway_fargate_service, echo_fargate_service ]: try: scaling = service.service.auto_scale_task_count(max_capacity=2) except AttributeError: scaling = service.auto_scale_task_count(max_capacity=2) scaling.scale_on_cpu_utilization( "CpuScaling", target_utilization_percent=50, scale_in_cooldown=core.Duration.seconds(60), scale_out_cooldown=core.Duration.seconds(60), ) # configure loadbalancer to listen on port 80 and add targets to gateway and echo apps load_balancer = aws_elasticloadbalancingv2.ApplicationLoadBalancer( self, "lb", vpc=vpc, internet_facing=True) listener = load_balancer.add_listener("PublicListener", port=80, open=True) health_check = aws_elasticloadbalancingv2.HealthCheck( interval=core.Duration.seconds(60), path="/ping", port="9080", timeout=core.Duration.seconds(5)) # attach ALB to ECS service listener.add_targets( "gateway", port=80, targets=[gateway_fargate_service, echo_fargate_service], health_check=health_check, ) # outputs of ALB and cluster core.CfnOutput(self, "LoadBalancerDNS", value=load_balancer.load_balancer_dns_name) core.CfnOutput(self, "ClusterName", value=cluster.cluster_name)
def CreateSVC(self, ZachTaskList, ZachECSNodeName, cluster, vpc, AppendHostFile, ENV_VARS, choice="ELB"): for TaskName, TaskValue in ZachTaskList.items(): ZachTaskDef = ecs.TaskDefinition( self, id=ZachECSNodeName + "-" + TaskName, compatibility=ecs.Compatibility.EC2, network_mode=ecs.NetworkMode.AWS_VPC) core.CfnOutput(self, id=TaskName + "-ARN", value=ZachTaskDef.task_definition_arn) for num in range(TaskValue.get("num", 1)): container = ZachTaskDef.add_container( id=ZachECSNodeName + "-" + TaskName + str(num), cpu=1, memory_limit_mib=512, memory_reservation_mib=256, readonly_root_filesystem=True, working_directory="/data/web", user='******', health_check=ecs.HealthCheck( command=["ping 127.0.0.1"], interval=core.Duration.seconds(30), retries=5, start_period=core.Duration.minutes(1), timeout=core.Duration.seconds(10)), hostname=ZachECSNodeName + "-" + TaskName, extra_hosts=AppendHostFile, environment=ENV_VARS, docker_labels=ENV_VARS, image=ecs.ContainerImage.from_registry( TaskValue.get("image", "nginx:latest")), logging=ecs.LogDrivers.fluentd()) port_mapping = ecs.PortMapping( container_port=TaskValue.get("port", 80), host_port=TaskValue.get("port", 80), protocol=ecs.Protocol.TCP) container.add_port_mappings(port_mapping) core.CfnOutput(self, id=container.container_name + "-ContainPort", value=str(container.container_port)) core.CfnOutput(self, id=container.container_name + "-MemLimit", value=str(container.memory_limit_specified)) core.CfnOutput(self, id=container.container_name + "-HostPort", value=str(port_mapping.host_port)) svc = self.ELB_SVC(ZachECSNodeName, TaskName, ZachTaskDef, cluster, vpc) if choice == "ELB" else self.NetworkLBSVC( ZachECSNodeName, TaskName, cluster) core.CfnOutput(self, id=ZachECSNodeName + "-ARN", value=cluster.cluster_arn) core.CfnOutput(self, id=ZachECSNodeName + "-VPCID", value=str(cluster.vpc.vpc_id)) core.CfnOutput(self, id=ZachECSNodeName + "-VPCZone", value=str(cluster.vpc.availability_zones))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) env = kwargs['env'] work_dir = pathlib.Path(__file__).parents[1] # These below steps allows to reuse ecs cluster which is aleady creatd by shared stack # Get cluster name from ssm parameter cluster_name = ssm.StringParameter.from_string_parameter_name( self, "GetClusterName", string_parameter_name="/dev/compute/container/ecs-cluster-name" ).string_value vpc_az = ssm.StringListParameter.from_string_list_parameter_name( self, "GetVpcAz", string_list_parameter_name="/dev/network/vpc/vpc-az" ).string_list_value # using string instead of stringlist because of subnets parsing issue vpc_public_subnets_1 = ssm.StringParameter.from_string_parameter_name( self, "GetVpcPublicSubnets1", string_parameter_name="/dev/network/vpc/vpc-public-subnets-1" ).string_value vpc_public_subnets_2 = ssm.StringParameter.from_string_parameter_name( self, "GetVpcPublicSubnets2", string_parameter_name="/dev/network/vpc/vpc-public-subnets-2" ).string_value vpc_id = ssm.StringParameter.from_string_parameter_name( self, "GetVpcId", string_parameter_name="/dev/network/vpc/vpc-id").string_value ec2_vpc = ec2.Vpc.from_vpc_attributes( self, "GetVpc", availability_zones=vpc_az, vpc_id=vpc_id, public_subnet_ids=[vpc_public_subnets_1, vpc_public_subnets_2]) # Get security group id from ssm parameter security_group_id = ssm.StringParameter.from_string_parameter_name( self, "GetSgId", string_parameter_name="/dev/network/vpc/security-group-id" ).string_value # Get security group from lookup ec2_sgp = ec2.SecurityGroup.from_security_group_id( self, "GetSgp", security_group_id=security_group_id) # myDateTimeFunction lambda function my_datetime_lambda = _lambda.Function( self, "my-datetime", runtime=_lambda.Runtime.NODEJS_12_X, handler="myDateTimeFunction.handler", code=_lambda.Code.asset("./lambda"), current_version_options=_lambda.VersionOptions( removal_policy=core.RemovalPolicy.RETAIN, retry_attempts=1)) my_datetime_lambda.add_to_role_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, actions=["lambda:InvokeFunction"], resources=["*"])) # beforeAllowTraffic lambda function pre_traffic_lambda = _lambda.Function( self, "pre-traffic", runtime=_lambda.Runtime.NODEJS_12_X, handler="beforeAllowTraffic.handler", code=_lambda.Code.asset("./lambda"), environment=dict( NewVersion=my_datetime_lambda.current_version.function_arn)) pre_traffic_lambda.add_to_role_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["codedeploy:PutLifecycleEventHookExecutionStatus"], resources=["*"])) pre_traffic_lambda.add_to_role_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, actions=["lambda:InvokeFunction"], resources=["*"])) # afterAllowTraffic lambda function post_traffic_lambda = _lambda.Function( self, "post-traffic", runtime=_lambda.Runtime.NODEJS_12_X, handler="afterAllowTraffic.handler", code=_lambda.Code.asset("./lambda"), environment=dict( NewVersion=my_datetime_lambda.current_version.function_arn)) post_traffic_lambda.add_to_role_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["codedeploy:PutLifecycleEventHookExecutionStatus"], resources=["*"])) post_traffic_lambda.add_to_role_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, actions=["lambda:InvokeFunction"], resources=["*"])) # create a cloudwatch event rule rule = events.Rule( self, "CanaryRule", schedule=events.Schedule.expression("rate(10 minutes)"), targets=[ events_targets.LambdaFunction( my_datetime_lambda.current_version) ], ) # create a cloudwatch alarm based on the lambda erros metrics alarm = cloudwatch.Alarm( self, "CanaryAlarm", metric=my_datetime_lambda.current_version.metric_invocations(), threshold=0, evaluation_periods=2, datapoints_to_alarm=2, treat_missing_data=cloudwatch.TreatMissingData.IGNORE, period=core.Duration.minutes(5), alarm_name="CanaryAlarm") lambda_deployment_group = codedeploy.LambdaDeploymentGroup( self, "datetime-lambda-deployment", alias=my_datetime_lambda.current_version.add_alias("live"), deployment_config=codedeploy.LambdaDeploymentConfig.ALL_AT_ONCE, alarms=[alarm], auto_rollback=codedeploy.AutoRollbackConfig( deployment_in_alarm=True), pre_hook=pre_traffic_lambda, post_hook=post_traffic_lambda) # Pass vpc, sgp and ecs cluster name to get ecs cluster info ecs_cluster = ecs.Cluster.from_cluster_attributes( self, "GetEcsCluster", cluster_name=cluster_name, vpc=ec2_vpc, security_groups=[ec2_sgp]) # Fargate Service task_definition = ecs.FargateTaskDefinition( self, "TaskDef", memory_limit_mib=512, cpu=256, ) container = task_definition.add_container( "web", image=ecs.ContainerImage.from_asset( os.path.join(work_dir, "container")), # Built custom health check for your application specific # and add them here. Ex: Pingcheck, Database etc. health_check=ecs.HealthCheck(command=["CMD-SHELL", "echo"]), # environment=dict(name="latest") ) port_mapping = ecs.PortMapping(container_port=8000, protocol=ecs.Protocol.TCP) container.add_port_mappings(port_mapping) # Create Fargate Service # Current limitation: Blue/Green deployment # https://github.com/aws/aws-cdk/issues/1559 service = ecs.FargateService( self, "Service", cluster=ecs_cluster, task_definition=task_definition, assign_public_ip=True, deployment_controller=ecs.DeploymentController( type=ecs.DeploymentControllerType.ECS), desired_count=2, min_healthy_percent=50) # Create Application LoadBalancer lb = elbv2.ApplicationLoadBalancer(self, "LB", vpc=ec2_vpc, internet_facing=True) # Add listener to the LB listener = lb.add_listener("Listener", port=80, open=True) # Default to Lambda listener.add_targets( "Lambda", targets=[elb_targets.LambdaTarget(my_datetime_lambda)]) # Additionally route to container listener.add_targets("Fargate", port=8000, path_pattern="/container", priority=10, targets=[service]) # add an output with a well-known name to read it from the integ tests self.load_balancer_dns_name = lb.load_balancer_dns_name
def __init__( self, scope: core.Construct, id: str, cloudmap_namespace="airflow.com", postgres_password="******", airflow_webserver_port=80, dags_folder="/src/dags", executor="CeleryExecutor", postgres_user="******", airflow_home="/airflow", aws_region="us-west-2", postgres_db="airflow", log_prefix="airflow", domain_name=None, hosted_zone=None, certificate=None, load_examples=True, web_container_desired_count=1, worker_container_desired_count=1, worker_cpu=2048, worker_memory_limit_mib=4096, vpc=None, bucket=None, log_driver=None, env=None, cluster=None, base_image=None, rds_instance=None, web_task=None, worker_task=None, scheduler_task=None, message_broker_task=None, message_broker_service=None, message_broker_service_name="rabbitmq", rabbitmq_alb=None, web_service=None, scheduler_service=None, worker_service=None, max_worker_count=16, worker_target_memory_utilization=80, worker_target_cpu_utilization=80, worker_memory_scale_in_cooldown=10, worker_memory_scale_out_cooldown=10, worker_cpu_scale_in_cooldown=10, worker_cpu_scale_out_cooldown=10, **kwargs, ) -> None: super().__init__(scope, id, **kwargs) vpc = vpc or aws_ec2.Vpc(self, "airflow-vpc") cloudmap_namespace_options = aws_ecs.CloudMapNamespaceOptions( name=cloudmap_namespace, vpc=vpc) bucket = bucket or aws_s3.Bucket( self, "airflow-bucket", removal_policy=core.RemovalPolicy.DESTROY, ) core.CfnOutput( self, "s3-log-bucket", value= f"https://s3.console.aws.amazon.com/s3/buckets/{bucket.bucket_name}", description="where worker logs are written to", ) log_driver = log_driver or aws_ecs.LogDriver.aws_logs( stream_prefix=log_prefix) environment = { "AIRFLOW__WEBSERVER__WEB_SERVER_PORT": airflow_webserver_port, # "AIRFLOW__CORE__HOSTNAME_CALLABLE": "socket:gethostname", "AIRFLOW__CORE__LOAD_EXAMPLES": load_examples, "AIRFLOW__CORE__DAGS_FOLDER": dags_folder, "AIRFLOW__CORE__EXECUTOR": executor, # "AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER": f"s3://{bucket.bucket_name}/airflow/logs", "AIRFLOW__CORE__REMOTE_LOG_CONN_ID": "aws_default", "AIRFLOW__CORE__REMOTE_LOGGING": "true", "AIRFLOW__CORE__ENCRYPT_S3_LOGS": "false", # "GUNICORN_CMD_ARGS": "--log-level WARNING", "C_FORCE_ROOT": "true", "INVOKE_RUN_ECHO": 1, # "POSTGRES_PASSWORD": postgres_password, "POSTGRES_USER": postgres_user, "POSTGRES_DB": postgres_db, # "AWS_DEFAULT_REGION": aws_region, "AIRFLOW_HOME": airflow_home, # "AIRFLOW_VAR_EXAMPLE_S3_CONN": "example_s3_conn", "AIRFLOW_VAR_DEFAULT_S3_BUCKET": bucket.bucket_name, # commenting out this part, because altering the user and # password will affect the way workers authenticate with # rabbitmq # "RABBITMQ_DEFAULT_USER": ..., # "RABBITMQ_DEFAULT_PASS": ..., } environment.update(env or {}) environment = {k: str(v) for k, v in environment.items()} cluster = cluster or aws_ecs.Cluster( self, "cluster", vpc=vpc, default_cloud_map_namespace=cloudmap_namespace_options, ) base_image = base_image or aws_ecs.ContainerImage.from_registry( "knowsuchagency/airflow-cdk") rds_instance = rds_instance or aws_rds.DatabaseInstance( self, "airflow-rds-instance", master_username=postgres_user, engine=aws_rds.DatabaseInstanceEngine.POSTGRES, allocated_storage=10, database_name=postgres_db, master_user_password=core.SecretValue.plain_text( postgres_password), vpc=vpc, instance_type=aws_ec2.InstanceType("t3.micro"), # TODO: turn this on when ready for prod deletion_protection=False, delete_automated_backups=True, removal_policy=core.RemovalPolicy.DESTROY, ) web_task = web_task or aws_ecs.FargateTaskDefinition( self, "web-task", cpu=1024, memory_limit_mib=2048, ) worker_task = worker_task or aws_ecs.FargateTaskDefinition( self, "worker-task", cpu=worker_cpu, memory_limit_mib=worker_memory_limit_mib, ) scheduler_task = scheduler_task or aws_ecs.FargateTaskDefinition( self, "scheduler-task", cpu=1024, memory_limit_mib=2048, ) message_broker_task_pre_configured = message_broker_task is not None message_broker_task = (message_broker_task or aws_ecs.FargateTaskDefinition( self, "message-broker-task", cpu=1024, memory_limit_mib=2048, )) if not message_broker_task_pre_configured: rabbitmq_container = message_broker_task.add_container( "rabbitmq_container", image=aws_ecs.ContainerImage.from_registry( "rabbitmq:management"), environment=environment, logging=log_driver, health_check=aws_ecs.HealthCheck( command=["CMD", "rabbitmqctl", "status"]), ) rabbitmq_container.add_port_mappings( aws_ecs.PortMapping(container_port=5672)) rabbitmq_container.add_port_mappings( aws_ecs.PortMapping(container_port=15672)) message_broker_service_pre_configured = (message_broker_service is not None) message_broker_service = (message_broker_service or aws_ecs.FargateService( self, "message_broker_service", task_definition=message_broker_task, cluster=cluster, )) if not message_broker_service_pre_configured: message_broker_service.enable_cloud_map( name=message_broker_service_name) message_broker_hostname = ( f"{message_broker_service_name}.{cloudmap_namespace}") for task in web_task, worker_task: bucket.grant_read_write(task.task_role.grant_principal) bucket.grant_delete(worker_task.task_role.grant_principal) postgres_hostname = rds_instance.db_instance_endpoint_address environment.update( AIRFLOW__CORE__SQL_ALCHEMY_CONN= f"postgresql+psycopg2://{postgres_user}" f":{postgres_password}@{postgres_hostname}" f":5432/{postgres_db}", AIRFLOW__CELERY__RESULT_BACKEND=f"db+postgresql://{postgres_user}" f":{postgres_password}@{postgres_hostname}" f":5432/{postgres_db}", AIRFLOW__CELERY__BROKER_URL=f"amqp://{message_broker_hostname}", ) web_container = web_task.add_container( "web-container", image=base_image, environment=environment, logging=log_driver, ) web_container.add_port_mappings( aws_ecs.PortMapping(container_port=airflow_webserver_port)) scheduler_container = scheduler_task.add_container( "scheduler-container", image=base_image, environment=environment, logging=log_driver, command=["scheduler"], ) worker_container = worker_task.add_container( "worker-container", image=base_image, environment=environment, logging=log_driver, command=["worker"], ) web_service_pre_configured = web_service is not None hosted_zone = hosted_zone or aws_route53.PublicHostedZone( self, "hosted-zone", zone_name=domain_name, comment="rendered from cdk", ) certificate = (certificate or certificate_manager.DnsValidatedCertificate( self, "tls-cert", hosted_zone=hosted_zone, domain_name=domain_name, )) protocol = elb.ApplicationProtocol.HTTPS web_service = (web_service or aws_ecs_patterns.ApplicationLoadBalancedFargateService( self, "web-service", task_definition=web_task, cluster=cluster, desired_count=web_container_desired_count, protocol=protocol, domain_zone=hosted_zone, domain_name=domain_name, certificate=certificate, )) if not web_service_pre_configured: web_service.target_group.configure_health_check( healthy_http_codes="200-399") scheduler_service = scheduler_service or aws_ecs.FargateService( self, "scheduler-service", task_definition=scheduler_task, cluster=cluster, ) worker_service_pre_configured = worker_service is not None worker_service = worker_service or aws_ecs.FargateService( self, "worker-service", task_definition=worker_task, cluster=cluster, desired_count=worker_container_desired_count, ) if not worker_service_pre_configured: scalable_task_count = worker_service.auto_scale_task_count( max_capacity=max_worker_count) scalable_task_count.scale_on_memory_utilization( "memory-utilization-worker-scaler", policy_name="memory-utilization-worker-scaler", target_utilization_percent=worker_target_memory_utilization, scale_in_cooldown=core.Duration.seconds( worker_memory_scale_in_cooldown), scale_out_cooldown=core.Duration.seconds( worker_memory_scale_out_cooldown), ) scalable_task_count.scale_on_cpu_utilization( "cpu-utilization-worker-scaler", policy_name="cpu-utilization-worker-scaler", target_utilization_percent=worker_target_cpu_utilization, scale_in_cooldown=core.Duration.seconds( worker_cpu_scale_in_cooldown), scale_out_cooldown=core.Duration.seconds( worker_cpu_scale_out_cooldown), ) for service in ( web_service.service, scheduler_service, worker_service, ): service.connections.allow_to( rds_instance, aws_ec2.Port.tcp(5432), description="allow connection to RDS", ) service.connections.allow_to( message_broker_service.connections, aws_ec2.Port.tcp(5672), description="allow connection to rabbitmq broker", ) service.connections.allow_to( message_broker_service.connections, aws_ec2.Port.tcp(15672), description="allow connection to rabbitmq management api", ) rabbitmq_alb_pre_configured = rabbitmq_alb is not None rabbitmq_alb = rabbitmq_alb or elb.ApplicationLoadBalancer( self, "rabbitmq-alb", vpc=vpc, internet_facing=True, ) if not rabbitmq_alb_pre_configured: core.CfnOutput( self, id="rabbitmqManagement", value=f"http://{rabbitmq_alb.load_balancer_dns_name}", ) rabbitmq_listener = rabbitmq_alb.add_listener("rabbitmq-listener", port=80) # rabbitmq_listener.add_targets( # message_broker_service.load_balancer_target( # container_name=rabbitmq_container.container_name, # # TODO: cdk bug? jsii.errors.JSIIError: Expected a string, got {"$jsii.byref":"Object@10056"} # container_port=15672, # ) # ) message_broker_service.register_load_balancer_targets( aws_ecs.EcsTarget( container_name=rabbitmq_container.container_name, container_port=15672, new_target_group_id="rabbitmq-management-tg", listener=aws_ecs.ListenerConfig.application_listener( rabbitmq_listener, ), )) rabbitmq_alb.connections.allow_to( message_broker_service.connections, aws_ec2.Port.tcp(15672), description="allow connection to rabbitmq management api", )
def __init__( self, scope: core.Construct, id: str, cloudmap_namespace="airflow.com", log_prefix="airflow", vpc=None, bucket=None, log_driver=None, base_image=None, rds_instance=None, message_broker_service=None, message_broker_service_name="rabbitmq", max_worker_count=16, worker_target_memory_utilization=80, worker_target_cpu_utilization=80, worker_memory_scale_in_cooldown=10, worker_memory_scale_out_cooldown=10, worker_cpu_scale_in_cooldown=10, worker_cpu_scale_out_cooldown=10, **kwargs, ) -> None: super().__init__(scope, id, **kwargs) """ Currently the code only supports single stack plan is to modularize it even further by allowing individual stack to be deployed. """ airflow_stack = AirflowStack(self, "airflow-stack", env=core.Environment( account="973069700476", region="us-west-2")) vpc = vpc or ec2.Vpc.from_lookup( airflow_stack, "VPC", vpc_name="vpc-devprivcdp-dev-private") # Create a namespace in ECS with the above VPC and namespace. cloudmap_namespace_options = ecs.CloudMapNamespaceOptions( name=cloudmap_namespace, vpc=vpc) # Configure an S3 bucket with associated policy objects. bucket = bucket or s3.Bucket( airflow_stack, "airflow-bucket", bucket_name="airflow-logs", removal_policy=core.RemovalPolicy.DESTROY, ) # Creates an CloudFormation Output value for this stack. core.CfnOutput( airflow_stack, "s3-log-bucket", value= f"https://s3.console.aws.amazon.com/s3/buckets/{bucket.bucket_name}", description="where worker logs are written to", ) cluster = ecs.Cluster( airflow_stack, "cluster", vpc=vpc, default_cloud_map_namespace=cloudmap_namespace_options, ) # This is pulling from docker hub directly base_image = base_image or ecs.ContainerImage.from_registry( "knowsuchagency/airflow-cdk") # rabbit mq image ''' Setup Postgres ''' rds_instance = rds_instance or rds.DatabaseInstance( airflow_stack, "airflow-rds-instance", master_username=postgres_user, engine=rds.DatabaseInstanceEngine.POSTGRES, allocated_storage=10, database_name=postgres_db, master_user_password=core.SecretValue.plain_text( postgres_password), vpc=vpc, instance_type=ec2.InstanceType("t3.micro"), deletion_protection=False, # Required in Prod. delete_automated_backups=True, removal_policy=core.RemovalPolicy.DESTROY, ) ''' Configure rabbit-mq alb ''' rabbitmq_alb = elb.ApplicationLoadBalancer(airflow_stack, "rabbitmq-alb", vpc=vpc, internet_facing=True) # cloud formation templates. core.CfnOutput( airflow_stack, id="rabbitmqManagement", value=f"http://{rabbitmq_alb.load_balancer_dns_name}", ) # add rabbit mq listener rabbitmq_listener = rabbitmq_alb.add_listener("rabbitmq-listener", port=80) ''' Configure all the environment variables needed by airflow. ''' # Creates a log driver configuration that sends log information to CloudWatch Logs. log_driver = log_driver or ecs.LogDriver.aws_logs( stream_prefix=log_prefix) env = {} postgres_hostname = rds_instance.db_instance_endpoint_address postgres_connection_suffix = f"://{postgres_user}:{postgres_password}@{postgres_hostname}:5432/{postgres_db}" connection_url = f"postgresql+psycopg2://{postgres_connection_suffix}" backend = f"db+postgresql://{postgres_connection_suffix}" message_broker_hostname = f"{message_broker_service_name}.{cloudmap_namespace}" broker_url = f"amqp://{message_broker_hostname}" env["AIRFLOW__CORE__SQL_ALCHEMY_CONN"] = connection_url env["AIRFLOW__CELERY__RESULT_BACKEND"] = backend env["AIRFLOW__CELERY__BROKER_URL"] = broker_url env = {k: str(v) for k, v in env.items()} ''' Create a web service for airflow. ''' web_container_service = ContainerService(airflow_stack) # create a task definition for the web service. web_container_service.add_task_definition("web-task", cpu=1024, memory_limit_mib=2048) # Grant permission to S3 bucket. bucket.grant_read_write( web_container_service.task_definition.task_role.grant_principal) # add container with appropriate task-definition for this task. web_container_service.add_container("web-container", base_image, env, log_driver) # create the fargate service web_container_service.fargate_service("web-service", cluster, is_alb=True) # configure the web-service web_container_service.service.target_group.configure_health_check( healthy_http_codes="200-399") # Open ports for the service ''' Create worker service for airflow ''' worker_container_service = ContainerService(airflow_stack) # create a task definition for worker service. worker_container_service.add_task_definition("worker-task", 1024, 2048) # Grant permission create / delete to S3 bucket. bucket.grant_read_write( worker_container_service.task_definition.task_role.grant_principal) bucket.grant_delete( worker_container_service.task_definition.task_role.grant_principal) # add container with appropriate task-definition for this task. worker_container_service.add_container("worker-container", base_image, environment=env, logging=log_driver, command=["worker"]) # configure worker node as a service. worker_container_service.fargate_service( "worker-service", cluster, desired_count=max_worker_count) # Add memory utilization scaler. worker_container_service.set_memory_utilization( max_worker_count, "auto-scale-worker-memory", policy_name="auto-scale-worker-memory", target_utilization_percent=worker_target_memory_utilization, scale_in_cooldown=worker_memory_scale_in_cooldown, scale_out_cooldown=worker_memory_scale_out_cooldown) # Add cpu target utilization scaler. worker_container_service.set_cpu_utilization( max_worker_count, "auto-scale-worker-cpu", policy_name="auto-scale-worker-cpu", target_utilization_percent=worker_target_cpu_utilization, scale_in_cooldown=worker_cpu_scale_in_cooldown, scale_out_cooldown=worker_cpu_scale_out_cooldown) ''' Create scheduler service for airflow ''' scheduler_container_service = ContainerService(airflow_stack) # create a task definition for scheduler service. scheduler_container_service.add_task_definition( "worker-task", 1024, 2048) # add container with appropriate task-definition for this task. scheduler_container_service.add_container("scheduler-container", base_image, environment=env, logging=log_driver, command=["scheduler"]) # configure scheduler as a service. scheduler_container_service.fargate_service("scheduler-service", cluster) # Open ports for the service should be last step ''' Create Rabbit-MQ as a message broker for Airflow. ''' message_broker_container_service = ContainerService(airflow_stack) # create a task definition for message broker service. message_broker_container_service.add_task_definition( "message-broker-task", 1024, 2048) message_broker_container_service.add_container( "rabbitmq_container", ecs.ContainerImage.from_registry("rabbitmq:management"), environment=env, logging=log_driver, health_check=ecs.HealthCheck( command=["CMD", "rabbitmqctl", "status"])) message_broker_container_service.add_container_port(5672) message_broker_container_service.add_container_port(15672) # configure scheduler as a service. message_broker_container_service.fargate_service( "message_broker_service", cluster) # enable cloud map for service discovery. Not sure why we do this. message_broker_container_service.enable_service_dicovery( message_broker_service_name) # Add target groups for the load balancer message_broker_container_service.register_lb_targets( ecs.ListenerConfig.application_listener(rabbitmq_listener), "rabbitmq-management-tg", message_broker_container_service.container.container_name, 15672) # TODO : make this better. rabbitmq_alb.connections.allow_to( message_broker_container_service.service.connections, ec2.Port.tcp(15672), description="allow connection to rabbit-mq management api") # open ports for web-service, worker-service and scheduler container_services = [ web_container_service, scheduler_container_service, worker_container_service ] for container_service in container_services: container_service.service_allow_connection( rds_instance, ec2.Port.tcp(5432), description="allow connection to RDS") container_service.service_allow_connection( message_broker_service.connections, ec2.Port.tcp(5672), description="allow connection to rabbit-mq broker") container_service.service_allow_connection( message_broker_service.connections, ec2.Port.tcp(15672), description="allow connection to rabbit-mq management api")