Esempio n. 1
0
    def __init__(self, scope: core.Construct, id: str, vpc_cidr: str,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self._vpc = ec2.Vpc(
            self,
            id,
            cidr=vpc_cidr,
            enable_dns_hostnames=True,
            enable_dns_support=True,
            max_azs=2,
            subnet_configuration=[
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC,
                                        name="BASTION",
                                        cidr_mask=24),
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE,
                                        name="ECS",
                                        cidr_mask=24),
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC,
                                        name="DBS",
                                        cidr_mask=24)
            ],
            nat_gateway_provider=ec2.NatProvider.gateway(),
            nat_gateway_subnets=ec2.SubnetSelection(
                one_per_az=True, subnet_group_name="BASTION"),
            gateway_endpoints={
                's3':
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3,
                    subnets=[
                        ec2.SubnetSelection(one_per_az=True,
                                            subnet_type=ec2.SubnetType.PUBLIC)
                    ])
            })
Esempio n. 2
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        vpc = aws_ec2.Vpc(
            self,
            "DynamodbVPC",
            max_azs=2,
            gateway_endpoints={
                "DynamoDB":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB)
            })

        #XXX: Another way to add DynamoDB VPC Endpoint
        #dynamo_db_endpoint = vpc.add_gateway_endpoint("DynamoDB",
        #  service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB
        #)

        ddb_table = aws_dynamodb.Table(
            self,
            "SimpleDynamoDbTable",
            table_name="SimpleTable",
            # removal_policy=cdk.RemovalPolicy.DESTROY,
            partition_key=aws_dynamodb.Attribute(
                name="pkid", type=aws_dynamodb.AttributeType.STRING),
            sort_key=aws_dynamodb.Attribute(
                name="sortkey", type=aws_dynamodb.AttributeType.NUMBER),
            time_to_live_attribute="ttl",
            billing_mode=aws_dynamodb.BillingMode.PROVISIONED,
            read_capacity=15,
            write_capacity=5,
        )
Esempio n. 3
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        self.vpc = ec2.Vpc(
            self,
            "VPC",
            max_azs=3,
            cidr="10.10.0.0/16",
            # configuration will create 3 groups in 2 AZs = 6 subnets.
            subnet_configuration=[
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC,
                                        name="PublicSubnet",
                                        cidr_mask=24),
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE,
                                        name="PrivateSubnet",
                                        cidr_mask=24)
            ],
            # nat_gateway_provider=ec2.NatProvider.gateway(),
            nat_gateways=2,
            gateway_endpoints={
                "S3":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3)
            })
        self.vpc.add_flow_log("FlowLogS3",
                              destination=ec2.FlowLogDestination.to_s3(),
                              traffic_type=ec2.FlowLogTrafficType.REJECT)

        props["vpc"] = self.vpc
Esempio n. 4
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        self.vpc = ec2.Vpc(
            self,
            "VPC",
            nat_gateways=1,
            subnet_configuration=[
                ec2.SubnetConfiguration(name="public",
                                        subnet_type=ec2.SubnetType.PUBLIC,
                                        cidr_mask=24),
                ec2.SubnetConfiguration(name="private",
                                        subnet_type=ec2.SubnetType.PRIVATE,
                                        cidr_mask=24),
                ec2.SubnetConfiguration(name="isolated",
                                        subnet_type=ec2.SubnetType.ISOLATED,
                                        cidr_mask=24)
            ],
            gateway_endpoints={
                "S3":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3)
            },
            flow_logs={"FlowLogs": ec2.FlowLogOptions()})
Esempio n. 5
0
    def provision_vpc(self, name: str, vpc: VPC):
        self.public_subnet_name = f"{name}-public"
        self.private_subnet_name = f"{name}-private"
        if not vpc.create:
            self.vpc = ec2.Vpc.from_lookup("Vpc", vpc_id=vpc.id)
            return

        nat_provider = ec2.NatProvider.gateway()
        self.vpc = ec2.Vpc(
            self.scope,
            "VPC",
            max_azs=vpc.max_azs,
            cidr=vpc.cidr,
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    subnet_type=ec2.SubnetType.PUBLIC,
                    name=self.public_subnet_name,
                    cidr_mask=24,  # can't use token ids
                ),
                ec2.SubnetConfiguration(
                    subnet_type=ec2.SubnetType.PRIVATE,
                    name=self.private_subnet_name,
                    cidr_mask=24,  # can't use token ids
                ),
            ],
            gateway_endpoints={
                "S3":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3),
            },
            nat_gateway_provider=nat_provider,
        )
        cdk.Tags.of(self.vpc).add("Name", name)
        cdk.CfnOutput(self.scope, "vpc-output", value=self.vpc.vpc_cidr_block)

        # ripped off this: https://github.com/aws/aws-cdk/issues/9573
        pod_cidr = ec2.CfnVPCCidrBlock(self.scope,
                                       "PodCidr",
                                       vpc_id=self.vpc.vpc_id,
                                       cidr_block="100.64.0.0/16")
        c = 0
        for az in self.vpc.availability_zones:
            pod_subnet = ec2.PrivateSubnet(
                self.scope,
                # this can't be okay
                f"{name}-pod-{c}",  # Can't use parameter/token in this name
                vpc_id=self.vpc.vpc_id,
                availability_zone=az,
                cidr_block=f"100.64.{c}.0/18",
            )

            pod_subnet.add_default_nat_route([
                gw for gw in nat_provider.configured_gateways if gw.az == az
            ][0].gateway_id)
            pod_subnet.node.add_dependency(pod_cidr)
            # TODO: need to tag

            c += 64

        for endpoint in [
                "ec2",  # Only these first three have predefined consts
                "sts",
                "ecr.api",
                "autoscaling",
                "ecr.dkr",
        ]:  # TODO: Do we need an s3 interface as well? or just the gateway?
            self.vpc_endpoint = ec2.InterfaceVpcEndpoint(
                self.scope,
                f"{endpoint}-ENDPOINT",
                vpc=self.vpc,
                service=ec2.InterfaceVpcEndpointAwsService(endpoint, port=443),
                # private_dns_enabled=True,
                subnets=ec2.SubnetSelection(
                    subnet_type=ec2.SubnetType.PRIVATE),
            )
Esempio n. 6
0
  def __init__(self, scope: Construct, id: str, **kwargs) -> None:
    super().__init__(scope, id, **kwargs)

    # The code that defines your stack goes here
    vpc = aws_ec2.Vpc(self, "NeptuneHolVPC",
      max_azs=2,
      gateway_endpoints={
        "S3": aws_ec2.GatewayVpcEndpointOptions(
          service=aws_ec2.GatewayVpcEndpointAwsService.S3
        )
      }
    )

    sg_use_graph_db = aws_ec2.SecurityGroup(self, "NeptuneClientSG",
      vpc=vpc,
      allow_all_outbound=True,
      description='security group for neptune client',
      security_group_name='use-neptune-client'
    )
    cdk.Tags.of(sg_use_graph_db).add('Name', 'use-neptune-client')

    sg_graph_db = aws_ec2.SecurityGroup(self, "NeptuneSG",
      vpc=vpc,
      allow_all_outbound=True,
      description='security group for neptune',
      security_group_name='neptune-server'
    )
    cdk.Tags.of(sg_graph_db).add('Name', 'neptune-server')

    sg_graph_db.add_ingress_rule(peer=sg_graph_db, connection=aws_ec2.Port.tcp(8182), description='neptune-server')
    sg_graph_db.add_ingress_rule(peer=sg_use_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-neptune-client')

    graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(self, 'NeptuneHolSubnetGroup',
      db_subnet_group_description='subnet group for neptune hol',
      subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids,
      db_subnet_group_name='neptune-hol'
    )

    graph_db = aws_neptune.CfnDBCluster(self, 'NeptuneHol',
      availability_zones=vpc.availability_zones,
      db_subnet_group_name=graph_db_subnet_group.db_subnet_group_name,
      db_cluster_identifier='neptune-hol',
      backup_retention_period=1,
      preferred_backup_window='08:45-09:15',
      preferred_maintenance_window='sun:18:00-sun:18:30',
      vpc_security_group_ids=[sg_graph_db.security_group_id]
    )
    graph_db.add_depends_on(graph_db_subnet_group)

    graph_db_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolInstance',
      db_instance_class='db.r5.large',
      allow_major_version_upgrade=False,
      auto_minor_version_upgrade=False,
      availability_zone=vpc.availability_zones[0],
      db_cluster_identifier=graph_db.db_cluster_identifier,
      db_instance_identifier='neptune-hol',
      preferred_maintenance_window='sun:18:00-sun:18:30'
    )
    graph_db_instance.add_depends_on(graph_db)

    graph_db_replica_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolReplicaInstance',
      db_instance_class='db.r5.large',
      allow_major_version_upgrade=False,
      auto_minor_version_upgrade=False,
      availability_zone=vpc.availability_zones[-1],
      db_cluster_identifier=graph_db.db_cluster_identifier,
      db_instance_identifier='neptune-hol-replica',
      preferred_maintenance_window='sun:18:00-sun:18:30'
    )
    graph_db_replica_instance.add_depends_on(graph_db)
    graph_db_replica_instance.add_depends_on(graph_db_instance)

    sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
    sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{
      "effect": aws_iam.Effect.ALLOW,
      "resources": ["arn:aws:s3:::aws-neptune-notebook",
        "arn:aws:s3:::aws-neptune-notebook/*"],
      "actions": ["s3:GetObject",
        "s3:ListBucket"]
    }))

    sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{
      "effect": aws_iam.Effect.ALLOW,
      "resources": ["arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".format(
        region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, cluster_id=graph_db.attr_cluster_resource_id)],
      "actions": ["neptune-db:connect"]
    }))

    sagemaker_notebook_role = aws_iam.Role(self, 'SageMakerNotebookForNeptuneWorkbenchRole',
      role_name='AWSNeptuneNotebookRole-NeptuneHol',
      assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
      #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
      inline_policies={
        'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
      }
    )

    neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'

echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc

aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=graph_db.attr_endpoint,
    NeptuneClusterPort=graph_db.attr_port,
    AWS_Region=cdk.Aws.REGION)

    neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
      content=cdk.Fn.base64(neptune_wb_lifecycle_content)
    )

    neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(self, 'NpetuneWorkbenchLifeCycleConfig',
      notebook_instance_lifecycle_config_name='NeptuneWorkbenchLifeCycleConfig',
      on_start=[neptune_wb_lifecycle_config_prop]
    )

    neptune_workbench = aws_sagemaker.CfnNotebookInstance(self, 'NeptuneWorkbench',
      instance_type='ml.t2.medium',
      role_arn=sagemaker_notebook_role.role_arn,
      lifecycle_config_name=neptune_wb_lifecycle_config.notebook_instance_lifecycle_config_name,
      notebook_instance_name='NeptuneHolWorkbench',
      root_access='Disabled',
      security_group_ids=[sg_use_graph_db.security_group_id],
      subnet_id=graph_db_subnet_group.subnet_ids[0]
    )
Esempio n. 7
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        vpc = aws_ec2.Vpc(
            self,
            "OctemberVPC",
            max_azs=2,
            #      subnet_configuration=[{
            #          "cidrMask": 24,
            #          "name": "Public",
            #          "subnetType": aws_ec2.SubnetType.PUBLIC,
            #        },
            #        {
            #          "cidrMask": 24,
            #          "name": "Private",
            #          "subnetType": aws_ec2.SubnetType.PRIVATE
            #        },
            #        {
            #          "cidrMask": 28,
            #          "name": "Isolated",
            #          "subnetType": aws_ec2.SubnetType.ISOLATED,
            #          "reserved": True
            #        }
            #      ],
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        dynamo_db_endpoint = vpc.add_gateway_endpoint(
            "DynamoDbEndpoint",
            service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB)

        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            bucket_name="octember-bizcard-{region}-{account}".format(
                region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID))

        api = apigw.RestApi(
            self,
            "BizcardImageUploader",
            rest_api_name="BizcardImageUploader",
            description="This service serves uploading bizcard images into s3.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            binary_media_types=["image/png", "image/jpg"],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        rest_api_role = aws_iam.Role(
            self,
            "ApiGatewayRoleForS3",
            role_name="ApiGatewayRoleForS3FullAccess",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess")
            ])

        list_objects_responses = [
            apigw.IntegrationResponse(
                status_code="200",
                #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters
                # The response parameters from the backend response that API Gateway sends to the method response.
                # Use the destination as the key and the source as the value:
                #  - The destination must be an existing response parameter in the MethodResponse property.
                #  - The source must be an existing method request parameter or a static value.
                response_parameters={
                    'method.response.header.Timestamp':
                    'integration.response.header.Date',
                    'method.response.header.Content-Length':
                    'integration.response.header.Content-Length',
                    'method.response.header.Content-Type':
                    'integration.response.header.Content-Type'
                }),
            apigw.IntegrationResponse(status_code="400",
                                      selection_pattern="4\d{2}"),
            apigw.IntegrationResponse(status_code="500",
                                      selection_pattern="5\d{2}")
        ]

        list_objects_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses)

        get_s3_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path='/',
            options=list_objects_integration_options)

        api.root.add_method(
            "GET",
            get_s3_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={'method.request.header.Content-Type': False})

        get_s3_folder_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters
            # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value.
            # The source must be an existing method request parameter or a static value.
            request_parameters={
                "integration.request.path.bucket": "method.request.path.folder"
            })

        get_s3_folder_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}",
            options=get_s3_folder_integration_options)

        s3_folder = api.root.add_resource('{folder}')
        s3_folder.add_method(
            "GET",
            get_s3_folder_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True
            })

        get_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            request_parameters={
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        get_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}/{object}",
            options=get_s3_item_integration_options)

        s3_item = s3_folder.add_resource('{item}')
        s3_item.add_method(
            "GET",
            get_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        put_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=[
                apigw.IntegrationResponse(status_code="200"),
                apigw.IntegrationResponse(status_code="400",
                                          selection_pattern="4\d{2}"),
                apigw.IntegrationResponse(status_code="500",
                                          selection_pattern="5\d{2}")
            ],
            request_parameters={
                "integration.request.header.Content-Type":
                "method.request.header.Content-Type",
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        put_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="PUT",
            path="{bucket}/{object}",
            options=put_s3_item_integration_options)

        s3_item.add_method(
            "PUT",
            put_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        ddb_table = dynamodb.Table(
            self,
            "BizcardImageMetaInfoDdbTable",
            table_name="OctemberBizcardImgMeta",
            partition_key=dynamodb.Attribute(
                name="image_id", type=dynamodb.AttributeType.STRING),
            billing_mode=dynamodb.BillingMode.PROVISIONED,
            read_capacity=15,
            write_capacity=5)

        img_kinesis_stream = kinesis.Stream(
            self, "BizcardImagePath", stream_name="octember-bizcard-image")

        # create lambda function
        trigger_textract_lambda_fn = _lambda.Function(
            self,
            "TriggerTextExtractorFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="TriggerTextExtractorFromImage",
            handler="trigger_text_extract_from_s3_image.lambda_handler",
            description="Trigger to extract text from an image in S3",
            code=_lambda.Code.asset(
                "./src/main/python/TriggerTextExtractFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        ddb_table_rw_policy_statement = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[ddb_table.table_arn],
            actions=[
                "dynamodb:BatchGetItem", "dynamodb:Describe*",
                "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query",
                "dynamodb:Scan", "dynamodb:BatchWriteItem",
                "dynamodb:DeleteItem", "dynamodb:PutItem",
                "dynamodb:UpdateItem", "dax:Describe*", "dax:List*",
                "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan",
                "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem",
                "dax:UpdateItem"
            ])

        trigger_textract_lambda_fn.add_to_role_policy(
            ddb_table_rw_policy_statement)
        trigger_textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[img_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        # assign notification for the s3 event type (ex: OBJECT_CREATED)
        s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/",
                                                   suffix=".jpg")
        s3_event_source = S3EventSource(s3_bucket,
                                        events=[s3.EventType.OBJECT_CREATED],
                                        filters=[s3_event_filter])
        trigger_textract_lambda_fn.add_event_source(s3_event_source)

        #XXX: https://github.com/aws/aws-cdk/issues/2240
        # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a
        # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props
        log_group = aws_logs.LogGroup(
            self,
            "TriggerTextractLogGroup",
            log_group_name="/aws/lambda/TriggerTextExtractorFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(trigger_textract_lambda_fn)

        text_kinesis_stream = kinesis.Stream(
            self, "BizcardTextData", stream_name="octember-bizcard-txt")

        textract_lambda_fn = _lambda.Function(
            self,
            "GetTextFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="GetTextFromImage",
            handler="get_text_from_s3_image.lambda_handler",
            description="extract text from an image in S3",
            code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement)
        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["textract:*"]))

        img_kinesis_event_source = KinesisEventSource(
            img_kinesis_stream,
            batch_size=100,
            starting_position=_lambda.StartingPosition.LATEST)
        textract_lambda_fn.add_event_source(img_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "GetTextFromImageLogGroup",
            log_group_name="/aws/lambda/GetTextFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(textract_lambda_fn)

        sg_use_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard elasticsearch client',
            security_group_name='use-octember-bizcard-es')
        core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es')

        sg_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard elasticsearch',
            security_group_name='octember-bizcard-es')
        core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es')

        sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='octember-bizcard-es')
        sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='use-octember-bizcard-es')

        sg_ssh_access = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for bastion host',
            security_group_name='octember-bastion-host-sg')
        core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host')
        sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(),
                                       connection=aws_ec2.Port.tcp(22),
                                       description='ssh access')

        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=aws_ec2.InstanceType('t3.nano'),
            security_group=sg_ssh_access,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC))
        bastion_host.instance.add_security_group(sg_use_bizcard_es)

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            'BizcardSearch',
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name="octember-bizcard",
            elasticsearch_version="7.9",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(service="es",
                                    resource="domain",
                                    resource_name="octember-bizcard/*")
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_bizcard_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids
            })
        core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es')

        s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name")

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        s3_lib_bucket = s3.Bucket.from_bucket_name(self, id,
                                                   s3_lib_bucket_name)
        es_lib_layer = _lambda.LayerVersion(
            self,
            "ESLib",
            layer_version_name="es-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-es-lib.zip"))

        redis_lib_layer = _lambda.LayerVersion(
            self,
            "RedisLib",
            layer_version_name="redis-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-redis-lib.zip"))

        #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342
        upsert_to_es_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToES",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToElasticSearch",
            handler="upsert_bizcard_to_es.lambda_handler",
            description="Upsert bizcard text into elasticsearch",
            code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard'
            },
            timeout=core.Duration.minutes(5),
            layers=[es_lib_layer],
            security_groups=[sg_use_bizcard_es],
            vpc=vpc)

        text_kinesis_event_source = KinesisEventSource(
            text_kinesis_stream,
            batch_size=99,
            starting_position=_lambda.StartingPosition.LATEST)
        upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToESLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToElasticSearch",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_es_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "glue:GetTable",
                                        "glue:GetTableVersion",
                                        "glue:GetTableVersions"
                                    ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:DescribeStream",
                                        "kinesis:GetShardIterator",
                                        "kinesis:GetRecords"
                                    ]))

        firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(service="logs",
                                    resource="log-group",
                                    resource_name="{}:log-stream:*".format(
                                        firehose_log_group_name),
                                    sep=":")
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "BizcardTextToS3",
            delivery_stream_name="octember-bizcard-txt-to-s3",
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration={
                "kinesisStreamArn": text_kinesis_stream.stream_arn,
                "roleArn": firehose_role.role_arn
            },
            extended_s3_destination_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Delivery"
                },
                "compressionFormat": "GZIP",
                "prefix": "bizcard-text/",
                "roleArn": firehose_role.role_arn
            })

        sg_use_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache client',
            security_group_name='use-octember-bizcard-es-cache')
        core.Tags.of(sg_use_bizcard_es_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache',
            security_group_name='octember-bizcard-es-cache')
        core.Tags.of(sg_bizcard_es_cache).add('Name',
                                              'octember-bizcard-es-cache')

        sg_bizcard_es_cache.add_ingress_rule(
            peer=sg_use_bizcard_es_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-es-cache')

        es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "QueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-es-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-es-cache')

        es_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardSearchQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-es-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-es-cache',
            vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id])

        #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster.
        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname
        es_query_cache.add_depends_on(es_query_cache_subnet_group)

        #XXX: add more than 2 security groups
        # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387
        # https://github.com/aws/aws-cdk/issues/1555
        # https://github.com/aws/aws-cdk/pull/5049
        bizcard_search_lambda_fn = _lambda.Function(
            self,
            "BizcardSearchServer",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardSearchProxy",
            handler="es_search_bizcard.lambda_handler",
            description="Proxy server to search bizcard text",
            code=_lambda.Code.asset("./src/main/python/SearchBizcard"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard',
                'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[es_lib_layer, redis_lib_layer],
            security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        search_api = apigw.LambdaRestApi(
            self,
            "BizcardSearchAPI",
            handler=bizcard_search_lambda_fn,
            proxy=False,
            rest_api_name="BizcardSearch",
            description="This service serves searching bizcard text.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_search = search_api.root.add_resource('search')
        bizcard_search.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sg_use_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db client',
            security_group_name='use-octember-bizcard-neptune')
        core.Tags.of(sg_use_bizcard_graph_db).add(
            'Name', 'use-octember-bizcard-neptune')

        sg_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db',
            security_group_name='octember-bizcard-neptune')
        core.Tags.of(sg_bizcard_graph_db).add('Name',
                                              'octember-bizcard-neptune')

        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='octember-bizcard-neptune')
        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_use_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='use-octember-bizcard-neptune')

        bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(
            self,
            "NeptuneSubnetGroup",
            db_subnet_group_description=
            "subnet group for octember-bizcard-neptune",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            db_subnet_group_name='octember-bizcard-neptune')

        bizcard_graph_db = aws_neptune.CfnDBCluster(
            self,
            "BizcardGraphDB",
            availability_zones=vpc.availability_zones,
            db_subnet_group_name=bizcard_graph_db_subnet_group.
            db_subnet_group_name,
            db_cluster_identifier="octember-bizcard",
            backup_retention_period=1,
            preferred_backup_window="08:45-09:15",
            preferred_maintenance_window="sun:18:00-sun:18:30",
            vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id])
        bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group)

        bizcard_graph_db_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[0],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_instance.add_depends_on(bizcard_graph_db)

        bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBReplicaInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[-1],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard-replica",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db)
        bizcard_graph_db_replica_instance.add_depends_on(
            bizcard_graph_db_instance)

        gremlinpython_lib_layer = _lambda.LayerVersion(
            self,
            "GremlinPythonLib",
            layer_version_name="gremlinpython-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(
                s3_lib_bucket, "var/octember-gremlinpython-lib.zip"))

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        upsert_to_neptune_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToGraphDB",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToNeptune",
            handler="upsert_bizcard_to_graph_db.lambda_handler",
            description="Upsert bizcard into neptune",
            code=_lambda.Code.asset(
                "./src/main/python/UpsertBizcardToGraphDB"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port
            },
            timeout=core.Duration.minutes(5),
            layers=[gremlinpython_lib_layer],
            security_groups=[sg_use_bizcard_graph_db],
            vpc=vpc)

        upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToGraphDBLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToNeptune",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_neptune_lambda_fn)

        sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache client',
            security_group_name='use-octember-bizcard-neptune-cache')
        core.Tags.of(sg_use_bizcard_neptune_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache',
            security_group_name='octember-bizcard-neptune-cache')
        core.Tags.of(sg_bizcard_neptune_cache).add(
            'Name', 'octember-bizcard-neptune-cache')

        sg_bizcard_neptune_cache.add_ingress_rule(
            peer=sg_use_bizcard_neptune_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-neptune-cache')

        recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "RecommQueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-neptune-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-neptune-cache')

        recomm_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardRecommQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-neptune-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-neptune-cache',
            vpc_security_group_ids=[
                sg_bizcard_neptune_cache.security_group_id
            ])

        recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group)

        bizcard_recomm_lambda_fn = _lambda.Function(
            self,
            "BizcardRecommender",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardRecommender",
            handler="neptune_recommend_bizcard.lambda_handler",
            description="This service serves PYMK(People You May Know).",
            code=_lambda.Code.asset("./src/main/python/RecommendBizcard"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port,
                'ELASTICACHE_HOST':
                recomm_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[gremlinpython_lib_layer, redis_lib_layer],
            security_groups=[
                sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache
            ],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        recomm_api = apigw.LambdaRestApi(
            self,
            "BizcardRecommendAPI",
            handler=bizcard_recomm_lambda_fn,
            proxy=False,
            rest_api_name="BizcardRecommend",
            description="This service serves PYMK(People You May Know).",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_recomm = recomm_api.root.add_resource('pymk')
        bizcard_recomm.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:s3:::aws-neptune-notebook",
                        "arn:aws:s3:::aws-neptune-notebook/*"
                    ],
                    "actions": ["s3:GetObject", "s3:ListBucket"]
                }))

        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".
                        format(region=core.Aws.REGION,
                               account=core.Aws.ACCOUNT_ID,
                               cluster_id=bizcard_graph_db.
                               attr_cluster_resource_id)
                    ],
                    "actions": ["neptune-db:connect"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookForNeptuneWorkbenchRole',
            role_name='AWSNeptuneNotebookRole-OctemberBizcard',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={
                'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
            })

        neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint,
           NeptuneClusterPort=bizcard_graph_db.attr_port,
           AWS_Region=core.Aws.REGION)

        neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(neptune_wb_lifecycle_content))

        neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'NpetuneWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'AWSNeptuneWorkbenchOctemberBizcardLCConfig',
            on_start=[neptune_wb_lifecycle_config_prop])

        neptune_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'NeptuneWorkbench',
            instance_type='ml.t2.medium',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=neptune_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='OctemberBizcard-NeptuneWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_bizcard_graph_db.security_group_name],
            subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
Esempio n. 8
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        vpc = aws_ec2.Vpc(
            self,
            "AnalyticsWorkshopVPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        core.Tag.add(sg_bastion_host, 'Name', 'bastion-host-sg')

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        #XXX: As there are no SSH public keys deployed on this machine,
        # you need to use EC2 Instance Connect with the command
        #  'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key.
        # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/
        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host)

        #TODO: SHOULD restrict IP range allowed to ssh acces
        bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0"))

        #XXX: In order to test data pipeline, add {Kinesis, KinesisFirehose}FullAccess Policy to the bastion host.
        bastion_host.role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["kinesis:*"]))
        bastion_host.role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["firehose:*"]))

        sg_use_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch client',
            security_group_name='use-es-cluster-sg')
        core.Tag.add(sg_use_es, 'Name', 'use-es-cluster-sg')

        sg_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch cluster',
            security_group_name='es-cluster-sg')
        core.Tag.add(sg_es, 'Name', 'es-cluster-sg')

        sg_es.add_ingress_rule(peer=sg_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_use_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='use-es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_bastion_host,
                               connection=aws_ec2.Port.all_tcp(),
                               description='bastion-host-sg')

        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            bucket_name="aws-analytics-immersion-day-{region}-{account}".
            format(region=kwargs['env'].region, account=kwargs['env'].account))

        trans_kinesis_stream = kinesis.Stream(
            self,
            "AnalyticsWorkshopKinesisStreams",
            stream_name='retail-trans')

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "glue:GetTable",
                                        "glue:GetTableVersion",
                                        "glue:GetTableVersions"
                                    ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=[trans_kinesis_stream.stream_arn],
                actions=[
                    "kinesis:DescribeStream", "kinesis:GetShardIterator",
                    "kinesis:GetRecords"
                ]))

        firehose_log_group_name = "/aws/kinesisfirehose/retail-trans"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(service="logs",
                                    resource="log-group",
                                    resource_name="{}:log-stream:*".format(
                                        firehose_log_group_name),
                                    sep=":")
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        trans_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KinesisFirehoseToS3",
            delivery_stream_name="retail-trans",
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration={
                "kinesisStreamArn": trans_kinesis_stream.stream_arn,
                "roleArn": firehose_role.role_arn
            },
            extended_s3_destination_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Delivery"
                },
                "compressionFormat":
                "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
                "prefix":
                "json-data/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/",
                "errorOutputPrefix":
                "error-json/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}",
                "roleArn": firehose_role.role_arn
            })

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_domain_name = 'retail'
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            "ElasticSearch",
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name=es_domain_name,
            elasticsearch_version="7.4",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(
                        service="es",
                        resource="domain",
                        resource_name="{}/*".format(es_domain_name))
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids
            })
        core.Tag.add(es_cfn_domain, 'Name', 'analytics-workshop-es')

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        s3_lib_bucket = s3.Bucket.from_bucket_name(self, id,
                                                   S3_BUCKET_LAMBDA_LAYER_LIB)
        es_lib_layer = _lambda.LayerVersion(
            self,
            "ESLib",
            layer_version_name="es-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket, "var/es-lib.zip"))

        #XXX: add more than 2 security groups
        # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387
        # https://github.com/aws/aws-cdk/issues/1555
        # https://github.com/aws/aws-cdk/pull/5049
        #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342
        upsert_to_es_lambda_fn = _lambda.Function(
            self,
            "UpsertToES",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertToES",
            handler="upsert_to_es.lambda_handler",
            description="Upsert records into elasticsearch",
            code=_lambda.Code.asset("./src/main/python/UpsertToES"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                #TODO: MUST set appropriate environment variables for your workloads.
                'ES_INDEX': 'retail',
                'ES_TYPE': 'trans',
                'REQUIRED_FIELDS': 'Invoice,StockCode,Customer_ID',
                'REGION_NAME': kwargs['env'].region,
                'DATE_TYPE_FIELDS': 'InvoiceDate'
            },
            timeout=core.Duration.minutes(5),
            layers=[es_lib_layer],
            security_groups=[sg_use_es],
            vpc=vpc)

        trans_kinesis_event_source = KinesisEventSource(
            trans_kinesis_stream,
            batch_size=1000,
            starting_position=_lambda.StartingPosition.LATEST)
        upsert_to_es_lambda_fn.add_event_source(trans_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertToESLogGroup",
            log_group_name="/aws/lambda/UpsertToES",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_es_lambda_fn)

        merge_small_files_lambda_fn = _lambda.Function(
            self,
            "MergeSmallFiles",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="MergeSmallFiles",
            handler="athena_ctas.lambda_handler",
            description="Merge small files in S3",
            code=_lambda.Code.asset("./src/main/python/MergeSmallFiles"),
            environment={
                #TODO: MUST set appropriate environment variables for your workloads.
                'OLD_DATABASE':
                'mydatabase',
                'OLD_TABLE_NAME':
                'retail_trans_json',
                'NEW_DATABASE':
                'mydatabase',
                'NEW_TABLE_NAME':
                'ctas_retail_trans_parquet',
                'WORK_GROUP':
                'primary',
                'OUTPUT_PREFIX':
                's3://{}'.format(
                    os.path.join(s3_bucket.bucket_name,
                                 'parquet-retail-trans')),
                'STAGING_OUTPUT_PREFIX':
                's3://{}'.format(os.path.join(s3_bucket.bucket_name, 'tmp')),
                'COLUMN_NAMES':
                'invoice,stockcode,description,quantity,invoicedate,price,customer_id,country',
            },
            timeout=core.Duration.minutes(5))

        merge_small_files_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["athena:*"]))
        merge_small_files_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "s3:Get*",
                                        "s3:List*",
                                        "s3:AbortMultipartUpload",
                                        "s3:PutObject",
                                    ]))
        merge_small_files_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "glue:CreateDatabase", "glue:DeleteDatabase",
                    "glue:GetDatabase", "glue:GetDatabases",
                    "glue:UpdateDatabase", "glue:CreateTable",
                    "glue:DeleteTable", "glue:BatchDeleteTable",
                    "glue:UpdateTable", "glue:GetTable", "glue:GetTables",
                    "glue:BatchCreatePartition", "glue:CreatePartition",
                    "glue:DeletePartition", "glue:BatchDeletePartition",
                    "glue:UpdatePartition", "glue:GetPartition",
                    "glue:GetPartitions", "glue:BatchGetPartition"
                ]))
        merge_small_files_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["lakeformation:GetDataAccess"]))

        lambda_fn_target = aws_events_targets.LambdaFunction(
            merge_small_files_lambda_fn)
        aws_events.Rule(self,
                        "ScheduleRule",
                        schedule=aws_events.Schedule.cron(minute="5"),
                        targets=[lambda_fn_target])

        log_group = aws_logs.LogGroup(
            self,
            "MergeSmallFilesLogGroup",
            log_group_name="/aws/lambda/MergeSmallFiles",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(merge_small_files_lambda_fn)
Esempio n. 9
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        vpc = aws_ec2.Vpc(
            self,
            "ElasticsearchHolVPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #XXX: As there are no SSH public keys deployed on this machine,
        # you need to use EC2 Instance Connect with the command
        #  'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key.
        # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/
        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host)

        #TODO: SHOULD restrict IP range allowed to ssh acces
        bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0"))

        sg_use_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch client',
            security_group_name='use-es-cluster-sg')
        cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg')

        sg_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch cluster',
            security_group_name='es-cluster-sg')
        cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg')

        sg_es.add_ingress_rule(peer=sg_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_use_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='use-es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_bastion_host,
                               connection=aws_ec2.Port.all_tcp(),
                               description='bastion-host-sg')

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_domain_name = 'es-hol'
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            "ElasticSearch",
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name=es_domain_name,
            elasticsearch_version="7.7",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(
                        service="es",
                        resource="domain",
                        resource_name="{}/*".format(es_domain_name))
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids
            })
        cdk.Tags.of(es_cfn_domain).add('Name', 'es-hol')

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'BastionHostPublicDNSName',
                      value=bastion_host.instance_public_dns_name,
                      export_name='BastionHostPublicDNSName')
        cdk.CfnOutput(self,
                      'ESDomainEndpoint',
                      value=es_cfn_domain.attr_domain_endpoint,
                      export_name='ESDomainEndpoint')
        cdk.CfnOutput(
            self,
            'ESDashboardsURL',
            value=f"{es_cfn_domain.attr_domain_endpoint}/_dashboards/",
            export_name='ESDashboardsURL')
Esempio n. 10
0
    def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Version of ParallelCluster for Cloud9.
        pcluster_version = cdk.CfnParameter(
            self,
            'ParallelClusterVersion',
            description=
            'Specify a custom parallelcluster version. See https://pypi.org/project/aws-parallelcluster/#history for options.',
            default='2.8.0',
            type='String',
            allowed_values=get_version_list('aws-parallelcluster'))

        # S3 URI for Config file
        config = cdk.CfnParameter(
            self,
            'ConfigS3URI',
            description='Set a custom parallelcluster config file.',
            default=
            'https://notearshpc-quickstart.s3.amazonaws.com/{0}/config.ini'.
            format(__version__))

        # Password
        password = cdk.CfnParameter(
            self,
            'UserPasswordParameter',
            description='Set a password for the hpc-quickstart user',
            no_echo=True)

        # create a VPC
        vpc = ec2.Vpc(
            self,
            'VPC',
            cidr='10.0.0.0/16',
            gateway_endpoints={
                "S3":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3),
                "DynamoDB":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.DYNAMODB)
            },
            max_azs=99)

        # create a private and public subnet per vpc
        selection = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE)

        # Output created subnets
        for i, public_subnet in enumerate(vpc.public_subnets):
            cdk.CfnOutput(self,
                          'PublicSubnet%i' % i,
                          value=public_subnet.subnet_id)

        for i, private_subnet in enumerate(vpc.private_subnets):
            cdk.CfnOutput(self,
                          'PrivateSubnet%i' % i,
                          value=private_subnet.subnet_id)

        cdk.CfnOutput(self, 'VPCId', value=vpc.vpc_id)

        # Create a Bucket
        data_bucket = s3.Bucket(self, "DataRepository")
        cdk.CfnOutput(self, 'DataRespository', value=data_bucket.bucket_name)
        cloudtrail_bucket = s3.Bucket(self, "CloudTrailLogs")
        quickstart_bucket = s3.Bucket.from_bucket_name(self,
                                                       'QuickStartBucket',
                                                       'aws-quickstart')

        # Upload Bootstrap Script to that bucket
        bootstrap_script = assets.Asset(self,
                                        'BootstrapScript',
                                        path='scripts/bootstrap.sh')

        # Upload parallel cluster post_install_script to that bucket
        pcluster_post_install_script = assets.Asset(
            self,
            'PclusterPostInstallScript',
            path='scripts/post_install_script.sh')

        # Upload parallel cluster post_install_script to that bucket
        pcluster_config_script = assets.Asset(self,
                                              'PclusterConfigScript',
                                              path='scripts/config.ini')

        # Setup CloudTrail
        cloudtrail.Trail(self, 'CloudTrail', bucket=cloudtrail_bucket)

        # Create a Cloud9 instance
        # Cloud9 doesn't have the ability to provide userdata
        # Because of this we need to use SSM run command
        cloud9_instance = cloud9.Ec2Environment(
            self,
            'ResearchWorkspace',
            vpc=vpc,
            instance_type=ec2.InstanceType(
                instance_type_identifier='c5.large'))
        cdk.CfnOutput(self,
                      'Research Workspace URL',
                      value=cloud9_instance.ide_url)

        # Create a keypair in lambda and store the private key in SecretsManager
        c9_createkeypair_role = iam.Role(
            self,
            'Cloud9CreateKeypairRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        c9_createkeypair_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        # Add IAM permissions to the lambda role
        c9_createkeypair_role.add_to_policy(
            iam.PolicyStatement(
                actions=['ec2:CreateKeyPair', 'ec2:DeleteKeyPair'],
                resources=['*'],
            ))

        # Lambda for Cloud9 keypair
        c9_createkeypair_lambda = _lambda.Function(
            self,
            'C9CreateKeyPairLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(300),
            role=c9_createkeypair_role,
            code=_lambda.Code.asset('functions/source/c9keypair'),
        )

        c9_createkeypair_provider = cr.Provider(
            self,
            "C9CreateKeyPairProvider",
            on_event_handler=c9_createkeypair_lambda)

        c9_createkeypair_cr = cfn.CustomResource(
            self,
            "C9CreateKeyPair",
            provider=c9_createkeypair_provider,
            properties={'ServiceToken': c9_createkeypair_lambda.function_arn})
        #c9_createkeypair_cr.node.add_dependency(instance_id)
        c9_ssh_private_key_secret = secretsmanager.CfnSecret(
            self,
            'SshPrivateKeySecret',
            secret_string=c9_createkeypair_cr.get_att_string('PrivateKey'))

        # The iam policy has a <REGION> parameter that needs to be replaced.
        # We do it programmatically so future versions of the synth'd stack
        # template include all regions.
        with open('iam/ParallelClusterUserPolicy.json') as json_file:
            data = json.load(json_file)
            for s in data['Statement']:
                if s['Sid'] == 'S3ParallelClusterReadOnly':
                    s['Resource'] = []
                    for r in region_info.RegionInfo.regions:
                        s['Resource'].append(
                            'arn:aws:s3:::{0}-aws-parallelcluster*'.format(
                                r.name))

            parallelcluster_user_policy = iam.CfnManagedPolicy(
                self,
                'ParallelClusterUserPolicy',
                policy_document=iam.PolicyDocument.from_json(data))

        # Cloud9 IAM Role
        cloud9_role = iam.Role(
            self,
            'Cloud9Role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonSSMManagedInstanceCore'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_managed_policy_arn(
                self, 'AttachParallelClusterUserPolicy',
                parallelcluster_user_policy.ref))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(resources=['*'],
                                actions=[
                                    'ec2:DescribeInstances',
                                    'ec2:DescribeVolumes', 'ec2:ModifyVolume'
                                ]))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(resources=[c9_ssh_private_key_secret.ref],
                                actions=['secretsmanager:GetSecretValue']))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(
                actions=["s3:Get*", "s3:List*"],
                resources=[
                    "arn:aws:s3:::%s/*" % (data_bucket.bucket_name),
                    "arn:aws:s3:::%s" % (data_bucket.bucket_name)
                ]))

        bootstrap_script.grant_read(cloud9_role)
        pcluster_post_install_script.grant_read(cloud9_role)
        pcluster_config_script.grant_read(cloud9_role)

        # Admin Group
        admin_group = iam.Group(self, 'AdminGroup')
        admin_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AdministratorAccess'))
        admin_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AWSCloud9Administrator'))

        # PowerUser Group
        poweruser_group = iam.Group(self, 'PowerUserGroup')
        poweruser_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name('PowerUserAccess'))
        poweruser_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AWSCloud9Administrator'))

        # HPC User
        user = iam.CfnUser(
            self,
            'Researcher',
            groups=[admin_group.node.default_child.ref],
            login_profile=iam.CfnUser.LoginProfileProperty(
                password_reset_required=True,
                password=cdk.SecretValue.cfn_parameter(password).to_string()))

        create_user = cdk.CfnParameter(self,
                                       "CreateUser",
                                       default="false",
                                       type="String",
                                       allowed_values=['true', 'false'
                                                       ]).value_as_string
        user_condition = cdk.CfnCondition(self,
                                          "UserCondition",
                                          expression=cdk.Fn.condition_equals(
                                              create_user, "true"))
        user.cfn_options.condition = user_condition

        cdk.CfnOutput(self,
                      'UserLoginUrl',
                      value="".join([
                          "https://", self.account,
                          ".signin.aws.amazon.com/console"
                      ]),
                      condition=user_condition)
        cdk.CfnOutput(self,
                      'UserName',
                      value=user.ref,
                      condition=user_condition)

        # Cloud9 Setup IAM Role
        cloud9_setup_role = iam.Role(
            self,
            'Cloud9SetupRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        cloud9_setup_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        # Allow pcluster to be run in bootstrap
        cloud9_setup_role.add_managed_policy(
            iam.ManagedPolicy.from_managed_policy_arn(
                self, 'AttachParallelClusterUserPolicySetup',
                parallelcluster_user_policy.ref))

        # Add IAM permissions to the lambda role
        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    'cloudformation:DescribeStackResources',
                    'ec2:AssociateIamInstanceProfile',
                    'ec2:AuthorizeSecurityGroupIngress',
                    'ec2:DescribeInstances',
                    'ec2:DescribeInstanceStatus',
                    'ec2:DescribeInstanceAttribute',
                    'ec2:DescribeIamInstanceProfileAssociations',
                    'ec2:DescribeVolumes',
                    'ec2:DesctibeVolumeAttribute',
                    'ec2:DescribeVolumesModifications',
                    'ec2:DescribeVolumeStatus',
                    'ssm:DescribeInstanceInformation',
                    'ec2:ModifyVolume',
                    'ec2:ReplaceIamInstanceProfileAssociation',
                    'ec2:ReportInstanceStatus',
                    'ssm:SendCommand',
                    'ssm:GetCommandInvocation',
                    's3:GetObject',
                    'lambda:AddPermission',
                    'lambda:RemovePermission',
                    'events:PutRule',
                    'events:DeleteRule',
                    'events:PutTargets',
                    'events:RemoveTargets',
                    'cloud9:CreateEnvironmentMembership',
                ],
                resources=['*'],
            ))

        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(actions=['iam:PassRole'],
                                resources=[cloud9_role.role_arn]))

        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(
                actions=['lambda:AddPermission', 'lambda:RemovePermission'],
                resources=['*']))

        # Cloud9 Instance Profile
        c9_instance_profile = iam.CfnInstanceProfile(
            self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name])

        # Lambda to add Instance Profile to Cloud9
        c9_instance_profile_lambda = _lambda.Function(
            self,
            'C9InstanceProfileLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9InstanceProfile'),
        )

        c9_instance_profile_provider = cr.Provider(
            self,
            "C9InstanceProfileProvider",
            on_event_handler=c9_instance_profile_lambda,
        )

        instance_id = cfn.CustomResource(self,
                                         "C9InstanceProfile",
                                         provider=c9_instance_profile_provider,
                                         properties={
                                             'InstanceProfile':
                                             c9_instance_profile.ref,
                                             'Cloud9Environment':
                                             cloud9_instance.environment_id,
                                         })
        instance_id.node.add_dependency(cloud9_instance)

        # Lambda for Cloud9 Bootstrap
        c9_bootstrap_lambda = _lambda.Function(
            self,
            'C9BootstrapLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9bootstrap'),
        )

        c9_bootstrap_provider = cr.Provider(
            self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda)

        c9_bootstrap_cr = cfn.CustomResource(
            self,
            "C9Bootstrap",
            provider=c9_bootstrap_provider,
            properties={
                'Cloud9Environment':
                cloud9_instance.environment_id,
                'BootstrapPath':
                's3://%s/%s' % (bootstrap_script.s3_bucket_name,
                                bootstrap_script.s3_object_key),
                'Config':
                config,
                'VPCID':
                vpc.vpc_id,
                'MasterSubnetID':
                vpc.public_subnets[0].subnet_id,
                'ComputeSubnetID':
                vpc.private_subnets[0].subnet_id,
                'PostInstallScriptS3Url':
                "".join([
                    's3://', pcluster_post_install_script.s3_bucket_name, "/",
                    pcluster_post_install_script.s3_object_key
                ]),
                'PostInstallScriptBucket':
                pcluster_post_install_script.s3_bucket_name,
                'S3ReadWriteResource':
                data_bucket.bucket_arn,
                'S3ReadWriteUrl':
                's3://%s' % (data_bucket.bucket_name),
                'KeyPairId':
                c9_createkeypair_cr.ref,
                'KeyPairSecretArn':
                c9_ssh_private_key_secret.ref,
                'UserArn':
                user.attr_arn,
                'PclusterVersion':
                pcluster_version.value_as_string
            })
        c9_bootstrap_cr.node.add_dependency(instance_id)
        c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr)
        c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret)
        c9_bootstrap_cr.node.add_dependency(data_bucket)

        enable_budget = cdk.CfnParameter(self,
                                         "EnableBudget",
                                         default="true",
                                         type="String",
                                         allowed_values=['true', 'false'
                                                         ]).value_as_string
        # Budgets
        budget_properties = {
            'budgetType': "COST",
            'timeUnit': "ANNUALLY",
            'budgetLimit': {
                'amount':
                cdk.CfnParameter(
                    self,
                    'BudgetLimit',
                    description=
                    'The initial budget for this project in USD ($).',
                    default=2000,
                    type='Number').value_as_number,
                'unit':
                "USD",
            },
            'costFilters': None,
            'costTypes': {
                'includeCredit': False,
                'includeDiscount': True,
                'includeOtherSubscription': True,
                'includeRecurring': True,
                'includeRefund': True,
                'includeSubscription': True,
                'includeSupport': True,
                'includeTax': True,
                'includeUpfront': True,
                'useAmortized': False,
                'useBlended': False,
            },
            'plannedBudgetLimits': None,
            'timePeriod': None,
        }

        email = {
            'notification': {
                'comparisonOperator': "GREATER_THAN",
                'notificationType': "ACTUAL",
                'threshold': 80,
                'thresholdType': "PERCENTAGE",
            },
            'subscribers': [{
                'address':
                cdk.CfnParameter(
                    self,
                    'NotificationEmail',
                    description=
                    'This email address will receive billing alarm notifications when 80% of the budget limit is reached.',
                    default='*****@*****.**').value_as_string,
                'subscriptionType':
                "EMAIL",
            }]
        }

        overall_budget = budgets.CfnBudget(
            self,
            "HPCBudget",
            budget=budget_properties,
            notifications_with_subscribers=[email],
        )
        overall_budget.cfn_options.condition = cdk.CfnCondition(
            self,
            "BudgetCondition",
            expression=cdk.Fn.condition_equals(enable_budget, "true"))
Esempio n. 11
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        cidr_block: str,
        platform_identifier: str = 'covariate-ingest',
        **kwargs
    ) -> None:
        super().__init__(scope, id, **kwargs)

        self.lambda_function_role_name = f'{platform_identifier}-lambda-function'
        self.node.set_context('lambda_function_role_name', self.lambda_function_role_name)

        self.batch_job_role_name = f'{platform_identifier}-batch-job'
        self.node.set_context('batch_job_role_name', self.batch_job_role_name)

        self.vpc = ec2.Vpc(
            self,
            "vpc",
            enable_dns_hostnames=True,
            enable_dns_support=True,
            flow_logs={
                "default":
                    ec2.FlowLogOptions(
                        destination=ec2.FlowLogDestination.to_cloud_watch_logs()
                    )
            },
            # max_azs=99,  # Means use all AZs
            max_azs=3,
            cidr=cidr_block,
            # configuration will create a subnet for each config, in each AZ.
            # So us-east-1 3 public, and 3 private
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    name="Public",
                    cidr_mask=24,
                    subnet_type=ec2.SubnetType.PUBLIC,
                ),
                ec2.SubnetConfiguration(
                    subnet_type=ec2.SubnetType.PRIVATE,
                    name="Private",
                    cidr_mask=20
                )
            ],
            gateway_endpoints={
                "S3":
                    ec2.GatewayVpcEndpointOptions(
                        service=ec2.GatewayVpcEndpointAwsService.S3
                    )
            },
        )
        self.vpc.add_interface_endpoint(
            "EcrDockerEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER
        )

        # Public NACL
        self.nacl_public = ec2.NetworkAcl(
            self,
            "nacl_public",
            vpc=self.vpc,
            subnet_selection=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PUBLIC
            )
        )
        self.nacl_public.add_entry(
            "in-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.INGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535)
        )
        self.nacl_public.add_entry(
            "out-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.EGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535)
        )

        # Private NACL
        self.nacl_private = ec2.NetworkAcl(
            self,
            "nacl_private",
            vpc=self.vpc,
            subnet_selection=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE
            )
        )
        self.nacl_private.add_entry(
            "in-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.INGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432)
        )
        self.nacl_private.add_entry(
            "out-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.EGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432)
        )

        # Add Batch Compute Envs
        cpu_instances = [
            ec2.InstanceType('c5.large'),
            ec2.InstanceType('c5.xlarge'),
            ec2.InstanceType('c5.2xlarge'),
            ec2.InstanceType('c5.4xlarge'),
            ec2.InstanceType('m5.large'),
            ec2.InstanceType('m5.xlarge'),
            ec2.InstanceType('m5.2xlarge'),
            ec2.InstanceType('m5.4xlarge'),
        ]

        self.cpu_on_demand = batch.ComputeEnvironment(
            self,
            'batch-cpu-on-demand',
            managed=True,
            enabled=True,
            compute_resources=batch.ComputeResources(
                vpc=self.vpc,  # Will select only private subnets.
                type=batch.ComputeResourceType.ON_DEMAND,
                allocation_strategy=batch.AllocationStrategy.
                BEST_FIT_PROGRESSIVE,
                minv_cpus=0,
                maxv_cpus=640,
                desiredv_cpus=0,
                instance_types=cpu_instances,
                image=ecs.EcsOptimizedImage.amazon_linux2(
                    hardware_type=ecs.AmiHardwareType.STANDARD
                ),
            ),
        )

        self.cpu_spot = batch.ComputeEnvironment(
            self,
            'batch-cpu-spot',
            managed=True,
            enabled=True,
            compute_resources=batch.ComputeResources(
                vpc=self.vpc,  # Will select only private subnets.
                type=batch.ComputeResourceType.SPOT,
                allocation_strategy=batch.AllocationStrategy.
                SPOT_CAPACITY_OPTIMIZED,
                bid_percentage=80,
                minv_cpus=0,
                maxv_cpus=640,
                desiredv_cpus=0,
                instance_types=cpu_instances,
                image=ecs.EcsOptimizedImage.amazon_linux2(
                    hardware_type=ecs.AmiHardwareType.STANDARD
                ),
            ),
        )

        self.cpu_spot_first = batch.JobQueue(
            self,
            'cpu-spot-first',
            job_queue_name=f'{platform_identifier}-cpu-queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=self.cpu_spot, order=1
                ),
                batch.JobQueueComputeEnvironment(
                    compute_environment=self.cpu_on_demand, order=2
                ),
            ],
            enabled=True,
            priority=10
        )

        self.lambda_function_role = iam.Role(
            self,
            'lambda-function-role',
            role_name=self.lambda_function_role_name,
            description='',
            assumed_by=iam.ServicePrincipal(service='lambda.amazonaws.com'),
        )
        

        self.batch_job_role = iam.Role(
            self,
            'batch-job-role',
            role_name=self.batch_job_role_name,
            description='',
            assumed_by=iam.ServicePrincipal(service='ecs-tasks.amazonaws.com'),
        )

        self.intermediate_bucket = s3.Bucket(
            self,
            f'{platform_identifier}-data-bucket',
            bucket_name=f'{platform_identifier}-data-dev',
            block_public_access=s3.BlockPublicAccess(
                block_public_acls=False,
                block_public_policy=False,
                ignore_public_acls=False,
                restrict_public_buckets=False
            ),
        )
        self.intermediate_bucket.grant_read_write(self.lambda_function_role)
        self.intermediate_bucket.grant_read_write(self.batch_job_role)

        cluster = ecs.Cluster(
            self, 
            "covar-api-cluster",
            cluster_name='covar-service-cluster',
            vpc=self.vpc
        )
Esempio n. 12
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter(
            self,
            'OpenSearchDomainName',
            type='String',
            description='Amazon OpenSearch Service domain name',
            default='opensearch-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[a-z]+[A-Za-z0-9\-]+')

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        #XXX: For createing Amazon MWAA in the existing VPC,
        # remove comments from the below codes and
        # comments out vpc = aws_ec2.Vpc(..) codes,
        # then pass -c vpc_name=your-existing-vpc to cdk command
        # for example,
        # cdk -c vpc_name=your-existing-vpc syth
        #
        # vpc_name = self.node.try_get_context('vpc_name')
        # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC',
        #   is_default=True,
        #   vpc_name=vpc_name
        # )

        vpc = aws_ec2.Vpc(
            self,
            "OpenSearchVPC",
            max_azs=3,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        sg_use_opensearch = aws_ec2.SecurityGroup(
            self,
            "OpenSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch client',
            security_group_name='use-opensearch-cluster-sg')
        cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg')

        sg_opensearch_cluster = aws_ec2.SecurityGroup(
            self,
            "OpenSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch cluster',
            security_group_name='opensearch-cluster-sg')
        cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_opensearch_cluster,
            connection=aws_ec2.Port.all_tcp(),
            description='opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp(443),
            description='use-opensearch-cluster-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='use-opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp(443),
            description='bastion-host-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='bastion-host-sg')

        master_user_secret = aws_secretsmanager.Secret(
            self,
            "OpenSearchMasterUserSecret",
            generate_secret_string=aws_secretsmanager.SecretStringGenerator(
                secret_string_template=json.dumps({"username": "******"}),
                generate_string_key="password",
                # Master password must be at least 8 characters long and contain at least one uppercase letter,
                # one lowercase letter, one number, and one special character.
                password_length=8))

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        # You should camelCase the property names instead of PascalCase
        opensearch_domain = aws_opensearchservice.Domain(
            self,
            "OpenSearch",
            domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string,
            version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0,
            capacity={
                "master_nodes": 3,
                "master_node_instance_type": "r6g.large.search",
                "data_nodes": 3,
                "data_node_instance_type": "r6g.large.search"
            },
            ebs={
                "volume_size": 10,
                "volume_type": aws_ec2.EbsDeviceVolumeType.GP2
            },
            #XXX: az_count must be equal to vpc subnets count.
            zone_awareness={"availability_zone_count": 3},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            },
            fine_grained_access_control=aws_opensearchservice.
            AdvancedSecurityOptions(
                master_user_name=master_user_secret.secret_value_from_json(
                    "username").to_string(),
                master_user_password=master_user_secret.secret_value_from_json(
                    "password")),
            # Enforce HTTPS is required when fine-grained access control is enabled.
            enforce_https=True,
            # Node-to-node encryption is required when fine-grained access control is enabled
            node_to_node_encryption=True,
            # Encryption-at-rest is required when fine-grained access control is enabled.
            encryption_at_rest={"enabled": True},
            use_unsigned_basic_auth=True,
            security_groups=[sg_opensearch_cluster],
            automated_snapshot_start_hour=17,  # 2 AM (GTM+9)
            vpc=vpc,
            vpc_subnets=[
                aws_ec2.SubnetSelection(
                    one_per_az=True,
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT)
            ],
            removal_policy=cdk.RemovalPolicy.
            DESTROY  # default: cdk.RemovalPolicy.RETAIN
        )
        cdk.Tags.of(opensearch_domain).add(
            'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}')

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'OpenSearchDomainEndpoint',
                      value=opensearch_domain.domain_endpoint,
                      export_name='OpenSearchDomainEndpoint')
        cdk.CfnOutput(
            self,
            'OpenSearchDashboardsURL',
            value=f"{opensearch_domain.domain_endpoint}/_dashboards/",
            export_name='OpenSearchDashboardsURL')
        cdk.CfnOutput(self,
                      'MasterUserSecretId',
                      value=master_user_secret.secret_name,
                      export_name='MasterUserSecretId')
Esempio n. 13
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #XXX: For createing Amazon MWAA in the existing VPC,
        # remove comments from the below codes and
        # comments out vpc = aws_ec2.Vpc(..) codes,
        # then pass -c vpc_name=your-existing-vpc to cdk command
        # for example,
        # cdk -c vpc_name=your-existing-vpc syth
        #
        # vpc_name = self.node.try_get_context('vpc_name')
        # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC',
        #   is_default=True,
        #   vpc_name=vpc_name
        # )

        #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack.
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html
        vpc = aws_ec2.Vpc(
            self,
            'MwaaStack',
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        s3_bucket_name = self.node.try_get_context('s3_bucket_for_dag_code')
        s3_bucket = s3.Bucket.from_bucket_name(self, "S3BucketForDAGCode",
                                               s3_bucket_name)

        DEFAULT_MWAA_ENV_NAME = 'MyAirflowEnv-{}'.format(''.join(
            random.sample((string.ascii_letters), k=5)))
        MY_MWAA_ENV_NAME = self.node.try_get_context('airflow_env_name')
        MY_MWAA_ENV_NAME = MY_MWAA_ENV_NAME if MY_MWAA_ENV_NAME else DEFAULT_MWAA_ENV_NAME

        sg_mwaa = aws_ec2.SecurityGroup(
            self,
            "AirflowSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for Amazon MWAA Environment {}'.format(
                MY_MWAA_ENV_NAME),
            security_group_name='airflow-sg-{}'.format(MY_MWAA_ENV_NAME))
        sg_mwaa.add_ingress_rule(peer=sg_mwaa,
                                 connection=aws_ec2.Port.all_traffic(),
                                 description='airflow security group')
        cdk.Tags.of(sg_mwaa).add('Name',
                                 'airflow-sg-{}'.format(MY_MWAA_ENV_NAME))

        mwaa_execution_policy_doc = aws_iam.PolicyDocument()
        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name}
                    "resources": [
                        self.format_arn(
                            service="airflow",
                            resource="environment",
                            resource_name=MY_MWAA_ENV_NAME,
                            arn_format=cdk.ArnFormat.SLASH_RESOURCE_NAME)
                    ],
                    "actions": ["airflow:PublishMetrics"]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.DENY,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": ["s3:ListAllMyBuckets"]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": ["s3:GetObject*", "s3:GetBucket*", "s3:List*"]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name}
                    "resources": [
                        self.format_arn(
                            service="logs",
                            resource="log-group",
                            resource_name="airflow-{}-*".format(
                                MY_MWAA_ENV_NAME),
                            arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                    ],
                    "actions": [
                        "logs:CreateLogStream", "logs:CreateLogGroup",
                        "logs:PutLogEvents", "logs:GetLogEvents",
                        "logs:GetLogRecord", "logs:GetLogGroupFields",
                        "logs:GetQueryResults"
                    ]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect": aws_iam.Effect.ALLOW,
                    "resources": ["*"],
                    "actions": ["logs:DescribeLogGroups"]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect": aws_iam.Effect.ALLOW,
                    "resources": ["*"],
                    "actions": ["cloudwatch:PutMetricData"]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name}
                    "resources": [
                        self.format_arn(service="sqs",
                                        account="*",
                                        resource="airflow-celery-*")
                    ],
                    "actions": [
                        "sqs:ChangeMessageVisibility", "sqs:DeleteMessage",
                        "sqs:GetQueueAttributes", "sqs:GetQueueUrl",
                        "sqs:ReceiveMessage", "sqs:SendMessage"
                    ]
                }))

        mwaa_execution_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "actions": [
                        "kms:Decrypt", "kms:DescribeKey",
                        "kms:GenerateDataKey*", "kms:Encrypt"
                    ],
                    "not_resources": [
                        self.format_arn(
                            service="kms",
                            region="*",
                            resource="key",
                            resource_name="*",
                            arn_format=cdk.ArnFormat.SLASH_RESOURCE_NAME)
                    ],
                    "conditions": {
                        "StringLike": {
                            "kms:ViaService": [
                                "sqs.{region}.amazonaws.com".format(
                                    region=kwargs['env'].region)
                            ]
                        }
                    }
                }))

        mwaa_execution_role = aws_iam.Role(
            self,
            'MWAAExecutionRole',
            role_name='AmazonMWAA-{name}-{suffix}'.format(
                name=MY_MWAA_ENV_NAME, suffix=str(kwargs['env'].account)[-5:]),
            assumed_by=aws_iam.ServicePrincipal('airflow.amazonaws.com'),
            path='/service-role/',
            inline_policies={
                'MWAA-Execution-Policy': mwaa_execution_policy_doc
            })

        #XXX: https://github.com/aws/aws-cdk/issues/3227
        mwaa_execution_role.assume_role_policy.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "actions": ["sts:AssumeRole"],
                    "principals":
                    [aws_iam.ServicePrincipal('airflow-env.amazonaws.com')]
                }))

        #XXX: NetworkConfiguration.SubnetIds: expected maximum item count: 2
        MAX_SUBNET_IDS = 2
        mwaa_network_conf = mwaa.CfnEnvironment.NetworkConfigurationProperty(
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).
            subnet_ids[:MAX_SUBNET_IDS],
            security_group_ids=[sg_mwaa.security_group_id])

        mwaa_logging_conf = mwaa.CfnEnvironment.LoggingConfigurationProperty(
            dag_processing_logs=mwaa.CfnEnvironment.
            ModuleLoggingConfigurationProperty(enabled=True,
                                               log_level="WARNING"),
            scheduler_logs=mwaa.CfnEnvironment.
            ModuleLoggingConfigurationProperty(enabled=True,
                                               log_level="WARNING"),
            task_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty(
                enabled=True, log_level="INFO"),
            webserver_logs=mwaa.CfnEnvironment.
            ModuleLoggingConfigurationProperty(enabled=True,
                                               log_level="WARNING"),
            worker_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty(
                enabled=True, log_level="WARNING"))

        mwaa_conf_options = {
            "logging.logging_level": "INFO",
            "core.default_timezone": "utc"
        }

        airflow_env = mwaa.CfnEnvironment(
            self,
            "MyAirflow",
            name=MY_MWAA_ENV_NAME,
            airflow_configuration_options=mwaa_conf_options,
            airflow_version="2.0.2",  #XXX: Valid values=[2.0.2, 1.10.12]
            dag_s3_path="dags",
            environment_class=
            "mw1.small",  #XXX: Valid values=[mw1.small, mw1.medium, mw1.large]
            execution_role_arn=mwaa_execution_role.role_arn,
            logging_configuration=mwaa_logging_conf,
            max_workers=2,
            min_workers=1,
            network_configuration=mwaa_network_conf,
            requirements_s3_path="requirements/requirements.txt",
            source_bucket_arn=s3_bucket.bucket_arn,
            #tags={"env": "staging", "service": "airflow"}, #XXX: https://github.com/aws/aws-cdk/issues/13772
            webserver_access_mode="PUBLIC_ONLY",
            weekly_maintenance_window_start="SUN:03:30")

        cdk.CfnOutput(self,
                      'StackName',
                      value=self.stack_name,
                      export_name='StackName')
        cdk.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId')

        cdk.CfnOutput(self,
                      'AirflowEnvName',
                      value=airflow_env.name,
                      export_name='AirflowEnvName')
        cdk.CfnOutput(self,
                      'AirflowVersion',
                      value=airflow_env.airflow_version,
                      export_name='AirflowVersion')
        cdk.CfnOutput(self,
                      'AirflowSourceBucketArn',
                      value=airflow_env.source_bucket_arn,
                      export_name='AirflowSourceBucketArn')
        cdk.CfnOutput(self,
                      'AirflowDagS3Path',
                      value=airflow_env.dag_s3_path,
                      export_name='AirflowDagS3Path')
        cdk.CfnOutput(self,
                      'MWAAEnvironmentClass',
                      value=airflow_env.environment_class,
                      export_name='MWAAEnvironmentClass')
        cdk.CfnOutput(self,
                      'MWAASecurityGroupID',
                      value=sg_mwaa.security_group_id,
                      export_name='MWAASecurityGroupID')
        cdk.CfnOutput(self,
                      'MWAAExecutionRoleArn',
                      value=airflow_env.execution_role_arn,
                      export_name='MWAAExecutionRoleArn')
Esempio n. 14
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        vpc = aws_ec2.Vpc(
            self,
            "FirehoseToS3VPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        ASYNC_CALLEE_LAMBDA_FN_NAME = "LambdaAsyncCallee"
        async_callee_lambda_fn = aws_lambda.Function(
            self,
            "LambdaAsyncCallee",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            function_name="LambdaAsyncCallee",
            handler="lambda_aync_callee.lambda_handler",
            description=
            "Lambda function asynchrously invoked by LambdaAsyncCaller",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            timeout=cdk.Duration.minutes(5))

        log_group = aws_logs.LogGroup(
            self,
            "LambdaAsyncCalleeLogGroup",
            #XXX: Circular dependency between resources occurs
            # if aws_lambda.Function.function_name is used
            # instead of literal name of lambda function such as "LambdaAsyncCallee"
            log_group_name="/aws/lambda/{}".format(
                ASYNC_CALLEE_LAMBDA_FN_NAME),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        log_group.grant_write(async_callee_lambda_fn)

        event_bus = aws_events.EventBus(
            self,
            "EventBusForLambda",
            event_bus_name="EventBusForLambdaDestinations",
        )
        event_bus.apply_removal_policy(cdk.RemovalPolicy.DESTROY)

        log_group = aws_logs.LogGroup(
            self,
            "EventBusLogGroup",
            log_group_name="/aws/events/{}".format(event_bus.event_bus_name),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)

        event_rule = aws_events.Rule(
            self,
            "EventRuleForLambdaDestinations",
            rule_name="EventRuleForLambdaDestinations",
            event_bus=event_bus,
            event_pattern={"account": [self.account]})
        event_rule.add_target(aws_events_targets.CloudWatchLogGroup(log_group))
        event_rule.apply_removal_policy(cdk.RemovalPolicy.DESTROY)

        CALLER_LAMBDA_FN_NAME = "LambdaAsyncCaller"
        caller_lambda_fn = aws_lambda.Function(
            self,
            "LambdaAsyncCaller",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            function_name="LambdaAsyncCaller",
            handler="lambda_caller.lambda_handler",
            description="Asynchronusly call lambda function",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            timeout=cdk.Duration.minutes(5),
            #XXX: Uncomments out if you want to use different lambda function version
            # current_version_options=aws_lambda.VersionOptions(
            #   on_success=aws_lambda_destinations.LambdaDestination(async_callee_lambda_fn, response_only=False),
            #   on_failure=aws_lambda_destinations.EventBridgeDestination(event_bus),
            #   max_event_age=cdk.Duration.hours(6), # Minimum: 60 seconds, Maximum: 6 hours, Default: 6 hours
            #   retry_attempts=0 # Minimum: 0, Maximum: 2, Default: 2
            # ),
            on_success=aws_lambda_destinations.LambdaDestination(
                async_callee_lambda_fn, response_only=False),
            on_failure=aws_lambda_destinations.EventBridgeDestination(
                event_bus),
            max_event_age=cdk.Duration.hours(
                6),  # Minimum: 60 seconds Maximum: 6 hours, Default: 6 hours
            #XXX: Set retry_attempts to 0 in order to invoke other lambda function as soon as a error occurred
            retry_attempts=0  # Minimum: 0, Maximum: 2, Default: 2
        )

        sns_topic = aws_sns.Topic(self,
                                  'SnsTopicForLambda',
                                  topic_name='LambdaSourceEvent',
                                  display_name='lambda source event')
        caller_lambda_fn.add_event_source(
            aws_lambda_event_sources.SnsEventSource(sns_topic))

        caller_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        partition="aws",
                        service="lambda",
                        region=cdk.Aws.REGION,
                        account=cdk.Aws.ACCOUNT_ID,
                        resource="function",
                        resource_name="{}*".format(
                            async_callee_lambda_fn.function_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["lambda:InvokeFunction"]))

        caller_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[event_bus.event_bus_arn],
                actions=["events:PutEvents"]))

        log_group = aws_logs.LogGroup(
            self,
            "LambdaAsyncCallerLogGroup",
            #XXX: Circular dependency between resources occurs
            # if aws_lambda.Function.function_name is used
            # instead of literal name of lambda function such as "LambdaAsyncCaller"
            log_group_name="/aws/lambda/{}".format(CALLER_LAMBDA_FN_NAME),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        log_group.grant_write(caller_lambda_fn)

        cdk.CfnOutput(self,
                      'SNSTopicName',
                      value=sns_topic.topic_name,
                      export_name='SNSTopicName')
        cdk.CfnOutput(self,
                      'SNSTopicArn',
                      value=sns_topic.topic_arn,
                      export_name='SNSTopicArn')
Esempio n. 15
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 stage={},
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # parameters from context
        customer = self.node.try_get_context("customer")
        namestage = stage['name_stage']
        vpccidr = stage['vpc_cidr']  # VPC CIDR
        vpcname = "vpc-" + customer + "-" + namestage
        subnetprefix = int(stage['subnet_prefix'])  # Subnets Prefix /XX
        maxazs = int(stage['max_azs'])  # Nro of AZs
        layers = stage['layers']  # Names for Layers
        layerendpoints = stage['layer_endpoints']  # Layer name for Endpoints
        layersnat = stage['layer_nats']  # Layer name for the Nat Subnets
        activeflowlogs = stage['active_flowlogs']  # Active flowlogs?

        # flags subnets types
        flg_public = False
        flg_private = False
        flg_isolated = False

        # TODO: HANDLE ERROR for insuficient space to layers in VPC CIDR Space
        nro_subnets = len(layers.keys()) * maxazs
        subnets = list(ip.ip_network(vpccidr).subnets(new_prefix=subnetprefix))

        # subnets configuration - layeres * azs
        subnetsconfs = []
        for layer in layers:
            layertype = layers[layer]
            if layertype == 'PUBLIC':
                sntype = _ec2.SubnetType.PUBLIC
                flg_public = True
            if layertype == 'PRIVATE':
                sntype = _ec2.SubnetType.PRIVATE
                flg_private = True
            if layertype == 'ISOLATED':
                flg_isolated = True
                sntype = _ec2.SubnetType.ISOLATED
            subnetsconfs.append(
                _ec2.SubnetConfiguration(name=layer,
                                         subnet_type=sntype,
                                         cidr_mask=subnetprefix))

        # selection subnets nat
        natsubnets = None
        if layersnat in layers and layers[layersnat] == 'PUBLIC':
            natsubnets = _ec2.SubnetSelection(subnet_group_name=layersnat)

        # vpc tenacy
        vpctenacy = _ec2.DefaultInstanceTenancy.DEFAULT
        if self.node.try_get_context("vpc_tenacy") == 'DEDICATED':
            vpctenacy = _ec2.DefaultInstanceTenancy.DEDICATED

        # creation vpc
        sn_layer_endpoints = [
            _ec2.SubnetSelection(one_per_az=True,
                                 subnet_group_name=layerendpoints)
        ]

        vpc = _ec2.Vpc(self,
                       vpcname,
                       max_azs=maxazs,
                       cidr=vpccidr,
                       subnet_configuration=subnetsconfs,
                       nat_gateway_subnets=natsubnets,
                       default_instance_tenancy=vpctenacy,
                       gateway_endpoints={
                           "S3":
                           _ec2.GatewayVpcEndpointOptions(
                               service=_ec2.GatewayVpcEndpointAwsService.S3,
                               subnets=sn_layer_endpoints)
                       })

        # Config Route Tables
        # TODO: create RT by subnets type

        publicsubnets = vpc.select_subnets(
            subnet_type=_ec2.SubnetType.PUBLIC) if flg_public else ""
        privatesubnets = vpc.select_subnets(
            subnet_type=_ec2.SubnetType.PRIVATE) if flg_private else ""
        isolatedsubnets = vpc.select_subnets(
            subnet_type=_ec2.SubnetType.ISOLATED) if flg_isolated else ""

        # Endpoints
        # s3 Endpoint
        print(layerendpoints)
        sn_layer_endpoints = _ec2.SubnetSelection(
            one_per_az=True, subnet_group_name=layerendpoints)
        #vpc.add_s3_endpoint(vpcname+"-S3Endpoint",subnets=sn_layer_endpoints)
        #vpc.add_gateway_endpoint(vpcname + "-S3Endpoint", service=_ec2.GatewayVpcEndpointAwsService.S3,
        #                         subnets=sn_layer_endpoints)

        # ec2 endpoint
        ec2_endpoint = vpc.add_interface_endpoint(
            vpcname + "-ec2_endpoint",
            service=_ec2.InterfaceVpcEndpointAwsService.E_C2,
            subnets=sn_layer_endpoints)
        ec2_endpoint.connections.allow_from_any_ipv4(
            port_range=_ec2.Port(from_port=443,
                                 to_port=443,
                                 protocol=_ec2.Protocol.TCP,
                                 string_representation="https"))

        # ec2 messages endpoint
        ec2messages_endpoint = vpc.add_interface_endpoint(
            vpcname + "-ec2message_endpoint",
            service=_ec2.InterfaceVpcEndpointAwsService.E_C2_MESSAGES,
            subnets=sn_layer_endpoints)
        ec2messages_endpoint.connections.allow_from_any_ipv4(
            port_range=_ec2.Port(from_port=443,
                                 to_port=443,
                                 protocol=_ec2.Protocol.TCP,
                                 string_representation="https"))

        # ssm endpoint
        ssm_endpoint = vpc.add_interface_endpoint(
            vpcname + "-ssm_endpoint",
            service=_ec2.InterfaceVpcEndpointAwsService.SSM,
            subnets=sn_layer_endpoints)
        ssm_endpoint.connections.allow_from_any_ipv4(
            port_range=_ec2.Port(from_port=443,
                                 to_port=443,
                                 protocol=_ec2.Protocol.TCP,
                                 string_representation="https"))

        # ssm messages endpoint
        ssmmessages_endpoint = vpc.add_interface_endpoint(
            vpcname + "-ssmmessages_endpoint",
            service=_ec2.InterfaceVpcEndpointAwsService.SSM_MESSAGES,
            subnets=sn_layer_endpoints)
        ssmmessages_endpoint.connections.allow_from_any_ipv4(
            port_range=_ec2.Port(from_port=443,
                                 to_port=443,
                                 protocol=_ec2.Protocol.TCP,
                                 string_representation="https"))

        # SSM IAM Role
        ec2_ssm_iam_role = iam.Role(
            self,
            "ssm_ec2_iam_role",
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            role_name="ssm_ec2_iam_role_" + stage['name_stage'])
        ec2_ssm_iam_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AmazonEC2RoleforSSM'))
        ec2_ssm_iam_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'CloudWatchAgentServerPolicy'))
        #add Role to instance profile
        iam.CfnInstanceProfile(
            self,
            "instance_profile",
            roles=["ssm_ec2_iam_role_" + stage['name_stage']],
            instance_profile_name="ssm_ec2_iam_role_" + stage['name_stage'])

        #add polices to iam role
        ec2_ssm_iam_role.add_to_policy(
            iam.PolicyStatement(resources=[
                "arn:aws:s3:::aws-ssm-" + self.region + "/*",
                "arn:aws:s3:::aws-windows-downloads-" + self.region + "/*",
                "arn:aws:s3:::amazon-ssm-" + self.region + "/*",
                "arn:aws:s3:::amazon-ssm-packages-" + self.region + "/*",
                "arn:aws:s3:::" + self.region + "-birdwatcher-prod/*",
                "arn:aws:s3:::patch-baseline-snapshot-" + self.region + "/*"
            ],
                                actions=["s3:GetObject"]))

        ec2_ssm_iam_role.add_to_policy(
            iam.PolicyStatement(resources=["*"],
                                actions=[
                                    "ssmmessages:CreateControlChannel",
                                    "ssmmessages:CreateDataChannel",
                                    "ssmmessages:OpenControlChannel",
                                    "ssmmessages:OpenDataChannel",
                                    "s3:GetEncryptionConfiguration",
                                    "kms:Decrypt"
                                ]))
Esempio n. 16
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        vpc = aws_ec2.Vpc(
            self,
            "FirehoseToS3VPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="firehose-to-s3-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        FIREHOSE_STREAM_NAME = cdk.CfnParameter(
            self,
            'FirehoseStreamName',
            type='String',
            description='kinesis data firehose stream name',
            default='PUT-S3-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))))

        FIREHOSE_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseBufferSize',
            type='Number',
            description='kinesis data firehose buffer size',
            min_value=1,
            max_value=128,
            default=128)

        FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseBufferInterval',
            type='Number',
            description='kinesis data firehose buffer interval',
            min_value=60,
            max_value=300,
            default=60)

        FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferSize',
            type='Number',
            description=
            'kinesis data firehose buffer size for AWS Lambda to transform records',
            min_value=1,
            max_value=3,
            default=3)

        FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferInterval',
            type='Number',
            description=
            'kinesis data firehose buffer interval for AWS Lambda to transform records',
            min_value=60,
            max_value=900,
            default=300)

        FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter(
            self,
            'FirehoseLambdaNumberOfRetries',
            type='Number',
            description=
            'Number of retries for AWS Lambda to transform records in kinesis data firehose',
            min_value=1,
            max_value=5,
            default=3)

        FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter(
            self,
            'FirehosePrefix',
            type='String',
            description='kinesis data firehose S3 prefix')

        FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter(
            self,
            'FirehoseErrorOutputPrefix',
            type='String',
            description='kinesis data firehose S3 error output prefix',
            default=
            'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}'
        )

        METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor"
        metadata_extract_lambda_fn = aws_lambda.Function(
            self,
            "MetadataExtractor",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            function_name="MetadataExtractor",
            handler="metadata_extractor.lambda_handler",
            description="Extract partition keys from records",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            timeout=cdk.Duration.minutes(5))

        log_group = aws_logs.LogGroup(
            self,
            "MetadataExtractorLogGroup",
            #XXX: Circular dependency between resources occurs
            # if aws_lambda.Function.function_name is used
            # instead of literal name of lambda function such as "MetadataExtractor"
            log_group_name="/aws/lambda/{}".format(
                METADATA_EXTRACT_LAMBDA_FN_NAME),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        log_group.grant_write(metadata_extract_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html
        # String-encoded tokens:
        #  Avoid manipulating the string in other ways. For example,
        #  taking a substring of a string is likely to break the string token.
        firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    #XXX: The ARN will be formatted as follows:
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                    "resources": [
                        self.format_arn(
                            partition="aws",
                            service="lambda",
                            region=cdk.Aws.REGION,
                            account=cdk.Aws.ACCOUNT_ID,
                            resource="function",
                            resource_name="{}:*".format(
                                metadata_extract_lambda_fn.function_name),
                            arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                    ],
                    "actions": [
                        "lambda:InvokeFunction",
                        "lambda:GetFunctionConfiguration"
                    ]
                }))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name="KinesisFirehoseServiceRole-{stream_name}-{region}".
            format(stream_name=FIREHOSE_STREAM_NAME.value_as_string,
                   region=cdk.Aws.REGION),
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            path='/service-role/',
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        lambda_proc = cfn.ProcessorProperty(
            type="Lambda",
            parameters=[
                cfn.ProcessorParameterProperty(
                    parameter_name="LambdaArn",
                    parameter_value='{}:{}'.format(
                        metadata_extract_lambda_fn.function_arn,
                        metadata_extract_lambda_fn.current_version.version)),
                cfn.ProcessorParameterProperty(
                    parameter_name="NumberOfRetries",
                    parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES.
                    value_as_string),
                cfn.ProcessorParameterProperty(
                    parameter_name="RoleArn",
                    parameter_value=firehose_role.role_arn),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferSizeInMBs",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string
                ),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferIntervalInSeconds",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL.
                    value_as_string)
            ])

        record_deaggregation_proc = cfn.ProcessorProperty(
            type="RecordDeAggregation",
            parameters=[
                cfn.ProcessorParameterProperty(parameter_name="SubRecordType",
                                               parameter_value="JSON")
            ])

        #XXX: Adding a new line delimiter when delivering data to S3
        # This is also particularly useful when dynamic partitioning is applied to aggregated data
        # because multirecord deaggregation (which must be applied to aggregated data
        # before it can be dynamically partitioned) removes new lines from records as part of the parsing process.
        # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter
        append_delim_to_record_proc = cfn.ProcessorProperty(
            type="AppendDelimiterToRecord", parameters=[])

        firehose_processing_config = cfn.ProcessingConfigurationProperty(
            enabled=True,
            processors=[
                record_deaggregation_proc, append_delim_to_record_proc,
                lambda_proc
            ])

        ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=s3_bucket.bucket_arn,
            role_arn=firehose_role.role_arn,
            buffering_hints={
                "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number,
                "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "DestinationDelivery"
            },
            compression_format=
            "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
            data_format_conversion_configuration={"enabled": False},
            dynamic_partitioning_configuration={
                "enabled": True,
                "retryOptions": {
                    "durationInSeconds": 300
                }
            },
            error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX.
            value_as_string,
            prefix=FIREHOSE_TO_S3_PREFIX.value_as_string,
            processing_configuration=firehose_processing_config)

        firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "FirehoseToS3",
            delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            extended_s3_destination_configuration=ext_s3_dest_config,
            tags=[{
                "key": "Name",
                "value": FIREHOSE_STREAM_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'StackName',
                      value=self.stack_name,
                      export_name='StackName')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
Esempio n. 17
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter(
            self,
            'OpenSearchDomainName',
            type='String',
            description='Amazon OpenSearch Service domain name',
            default='opensearch-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[a-z]+[A-Za-z0-9\-]+')

        OPENSEARCH_INDEX_NAME = cdk.CfnParameter(
            self,
            'SearchIndexName',
            type='String',
            description='Amazon OpenSearch Service index name')

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        #
        vpc = aws_ec2.Vpc(
            self,
            "EKKStackVPC",
            max_azs=3,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        sg_use_opensearch = aws_ec2.SecurityGroup(
            self,
            "OpenSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch client',
            security_group_name='use-opensearch-cluster-sg')
        cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg')

        sg_opensearch_cluster = aws_ec2.SecurityGroup(
            self,
            "OpenSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch cluster',
            security_group_name='opensearch-cluster-sg')
        cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_opensearch_cluster,
            connection=aws_ec2.Port.all_tcp(),
            description='opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp(443),
            description='use-opensearch-cluster-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='use-opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp(443),
            description='bastion-host-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='bastion-host-sg')

        master_user_secret = aws_secretsmanager.Secret(
            self,
            "OpenSearchMasterUserSecret",
            generate_secret_string=aws_secretsmanager.SecretStringGenerator(
                secret_string_template=json.dumps({"username": "******"}),
                generate_string_key="password",
                # Master password must be at least 8 characters long and contain at least one uppercase letter,
                # one lowercase letter, one number, and one special character.
                password_length=8))

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        # You should camelCase the property names instead of PascalCase
        opensearch_domain = aws_opensearchservice.Domain(
            self,
            "OpenSearch",
            domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string,
            version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0,
            #XXX: You cannot use graviton instances with non-graviton instances.
            # Use graviton instances as data nodes or use non-graviton instances as master nodes.
            capacity={
                "master_nodes": 3,
                "master_node_instance_type": "r6g.large.search",
                "data_nodes": 3,
                "data_node_instance_type": "r6g.large.search"
            },
            ebs={
                "volume_size": 10,
                "volume_type": aws_ec2.EbsDeviceVolumeType.GP2
            },
            #XXX: az_count must be equal to vpc subnets count.
            zone_awareness={"availability_zone_count": 3},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            },
            fine_grained_access_control=aws_opensearchservice.
            AdvancedSecurityOptions(
                master_user_name=master_user_secret.secret_value_from_json(
                    "username").to_string(),
                master_user_password=master_user_secret.secret_value_from_json(
                    "password")),
            # Enforce HTTPS is required when fine-grained access control is enabled.
            enforce_https=True,
            # Node-to-node encryption is required when fine-grained access control is enabled
            node_to_node_encryption=True,
            # Encryption-at-rest is required when fine-grained access control is enabled.
            encryption_at_rest={"enabled": True},
            use_unsigned_basic_auth=True,
            security_groups=[sg_opensearch_cluster],
            automated_snapshot_start_hour=17,  # 2 AM (GTM+9)
            vpc=vpc,
            vpc_subnets=[
                aws_ec2.SubnetSelection(
                    one_per_az=True,
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT)
            ],
            removal_policy=cdk.RemovalPolicy.
            DESTROY  # default: cdk.RemovalPolicy.RETAIN
        )
        cdk.Tags.of(opensearch_domain).add(
            'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}')

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="opskk-stack-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=[
                    opensearch_domain.domain_arn,
                    "{}/*".format(opensearch_domain.domain_arn)
                ],
                actions=[
                    "es:DescribeElasticsearchDomain",
                    "es:DescribeElasticsearchDomains",
                    "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost",
                    "es:ESHttpPut"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/
                resources=[
                    opensearch_domain.domain_arn,
                    f"{opensearch_domain.domain_arn}/_all/_settings",
                    f"{opensearch_domain.domain_arn}/_cluster/stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%",
                    f"{opensearch_domain.domain_arn}/_nodes",
                    f"{opensearch_domain.domain_arn}/_nodes/stats",
                    f"{opensearch_domain.domain_arn}/_nodes/*/stats",
                    f"{opensearch_domain.domain_arn}/_stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats"
                ],
                actions=["es:ESHttpGet"]))

        firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name=
            f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty(
            role_arn=firehose_role.role_arn,
            security_group_ids=[sg_use_opensearch.security_group_id],
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids)

        opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty(
            index_name=OPENSEARCH_INDEX_NAME.value_as_string,
            role_arn=firehose_role.role_arn,
            s3_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Backup"
                },
                "compressionFormat":
                "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
                # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify
                # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console.
                "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/",
                "roleArn": firehose_role.role_arn
            },
            buffering_hints={
                "intervalInSeconds": 60,
                "sizeInMBs": 1
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "ElasticsearchDelivery"
            },
            domain_arn=opensearch_domain.domain_arn,
            index_rotation_period=
            "NoRotation",  # [NoRotation | OneDay | OneHour | OneMonth | OneWeek]
            retry_options={"durationInSeconds": 60},
            s3_backup_mode=
            "FailedDocumentsOnly",  # [AllDocuments | FailedDocumentsOnly]
            vpc_configuration=opensearch_dest_vpc_config)

        firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KinesisFirehoseToES",
            delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            elasticsearch_destination_configuration=opensearch_dest_config,
            tags=[{
                "key": "Name",
                "value": OPENSEARCH_INDEX_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'OpenSearchDomainEndpoint',
                      value=opensearch_domain.domain_endpoint,
                      export_name='OpenSearchDomainEndpoint')
        cdk.CfnOutput(
            self,
            'OpenSearchDashboardsURL',
            value=f"{opensearch_domain.domain_endpoint}/_dashboards/",
            export_name='OpenSearchDashboardsURL')
        cdk.CfnOutput(self,
                      'MasterUserSecretId',
                      value=master_user_secret.secret_name,
                      export_name='MasterUserSecretId')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
        cdk.CfnOutput(self,
                      'FirehoseRoleArn',
                      value=firehose_role.role_arn,
                      export_name='FirehoseRoleArn')
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 stage={},
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        customer = self.node.try_get_context("customer")
        stage_name = stage["stage_name"]
        vpc_cidr = stage["vpc_cidr"]
        vpc_prefix = stage["vpc_prefix"]
        prefix_name = f'{vpc_prefix}-{stage_name}-{customer}'
        subnet_prefix = int(stage['subnet_prefix'])
        max_azs = int(stage['max_azs'])
        nat_number = int(stage['nat_number'])
        layers = stage['layers']
        layer_endpoints = stage['layer_endpoints']
        layers_nat = stage['layer_nats']

        flag_public = False
        flag_private = False
        flag_isolated = False

        subnets_config = []
        for layer in layers:
            layer_type = layers[layer]
            if layer_type == 'PUBLIC':
                subnet_type = _ec2.SubnetType.PUBLIC
                flag_public = True
            if layer_type == 'PRIVATE':
                subnet_type = _ec2.SubnetType.PRIVATE
                flag_private = True
            if layer_type == 'ISOLATED':
                flag_isolated = True
                subnet_type = _ec2.SubnetType.ISOLATED
            subnets_config.append(
                _ec2.SubnetConfiguration(name=layer,
                                         subnet_type=subnet_type,
                                         cidr_mask=subnet_prefix))

        nat_subnets = None
        if layers_nat in layers and layers[layers_nat] == 'PUBLIC':
            nat_subnets = _ec2.SubnetSelection(subnet_group_name=layers_nat)

        vpc_tenacy = _ec2.DefaultInstanceTenancy.DEFAULT
        if self.node.try_get_context("vpc_tenacy") == 'DEDICATED':
            vpc_tenacy = _ec2.DefaultInstanceTenancy.DEDICATED

        subnet_layer_endpoints = [
            _ec2.SubnetSelection(one_per_az=True,
                                 subnet_group_name=layer_endpoints)
        ]

        self.vpc = _ec2.Vpc(
            self,
            prefix_name,
            max_azs=max_azs,
            cidr=vpc_cidr,
            subnet_configuration=subnets_config,
            nat_gateway_subnets=nat_subnets,
            nat_gateways=nat_number,
            default_instance_tenancy=vpc_tenacy,
            gateway_endpoints={
                "S3":
                _ec2.GatewayVpcEndpointOptions(
                    service=_ec2.GatewayVpcEndpointAwsService.S3,
                    subnets=subnet_layer_endpoints)
            })

        # tagging
        core.Tags.of(self.vpc.node.default_child).add("Name",
                                                      f'{prefix_name}-vpc')
        core.Tags.of(self.vpc.node.find_child('IGW')).add(
            "Name", f'{prefix_name}-igw')

        prisub = [prs for prs in self.vpc.private_subnets]
        pubsub = [pus for pus in self.vpc.public_subnets]
        isosub = [ios for ios in self.vpc.isolated_subnets]

        count = 1
        for nat in stage['nat_number']:
            core.Tags.of(
                self.vpc.node.find_child('publicSubnet' + str(count)).node.
                find_child('NATGateway')).add("Name", f'{prefix_name}-nat')
            core.Tags.of(
                self.vpc.node.find_child(
                    'publicSubnet' + str(count)).node.find_child("EIP")).add(
                        "Name", f'{prefix_name}-public-eip-{count}')
            count += 1

        count = 1
        for prs in prisub:
            az_end = prs.availability_zone[-2:]
            core.Tags.of(prs.node.default_child).add(
                "Name", f'{prefix_name}-private-{az_end}')
            core.Tags.of(
                self.vpc.node.find_child(
                    'privateSubnet' +
                    str(count)).node.find_child('RouteTable')).add(
                        "Name", f'{prefix_name}-private-rt-{az_end}')
            count += 1

        count = 1
        for pus in pubsub:
            az_end = pus.availability_zone[-2:]
            core.Tags.of(pus.node.default_child).add(
                "Name", f'{prefix_name}-public-{az_end}')
            core.Tags.of(
                self.vpc.node.find_child(
                    'publicSubnet' +
                    str(count)).node.find_child('RouteTable')).add(
                        "Name", f'{prefix_name}-public-rt-{az_end}')
            count += 1

        count = 1
        for ios in isosub:
            az_end = ios.availability_zone[-2:]
            core.Tags.of(ios.node.default_child).add(
                "Name", f'{prefix_name}-database-{az_end}')
            core.Tags.of(
                self.vpc.node.find_child(
                    'databaseSubnet' +
                    str(count)).node.find_child('RouteTable')).add(
                        "Name", f'{prefix_name}-database-rt-{az_end}')
            count += 1

        core.CfnOutput(self, "Output", value=self.vpc.vpc_id)
Esempio n. 19
0
  def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
    super().__init__(scope, construct_id, **kwargs)

    #XXX: For createing Amazon MWAA in the existing VPC,
    # remove comments from the below codes and
    # comments out vpc = aws_ec2.Vpc(..) codes,
    # then pass -c vpc_name=your-existing-vpc to cdk command
    # for example,
    # cdk -c vpc_name=your-existing-vpc syth
    #
    # vpc_name = self.node.try_get_context('vpc_name')
    # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC',
    #   is_default=True,
    #   vpc_name=vpc_name
    # )

    vpc = aws_ec2.Vpc(self, "ApiGatewayDynamoDBVPC",
      max_azs=2,
      gateway_endpoints={
        "S3": aws_ec2.GatewayVpcEndpointOptions(
          service=aws_ec2.GatewayVpcEndpointAwsService.S3
        ),
        "DynamoDB": aws_ec2.GatewayVpcEndpointOptions(
          service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB
        )
      }
    )

    DDB_TABLE_SUFFIX = ''.join(random.sample((string.ascii_lowercase + string.digits), k=7))
    DDB_TABLE_NAME = "Comments-{}".format(DDB_TABLE_SUFFIX)

    ddb_table = aws_dynamodb.Table(self, "DynamoDbTable",
      table_name=DDB_TABLE_NAME,
      removal_policy=cdk.RemovalPolicy.DESTROY,
      partition_key=aws_dynamodb.Attribute(name="commentId",
        type=aws_dynamodb.AttributeType.STRING),
      time_to_live_attribute="ttl",
      billing_mode=aws_dynamodb.BillingMode.PROVISIONED,
      read_capacity=15,
      write_capacity=5,
    )

    ddb_table.add_global_secondary_index(
      read_capacity=15,
      write_capacity=5,
      index_name="pageId-index",
      partition_key=aws_dynamodb.Attribute(name='pageId', type=aws_dynamodb.AttributeType.STRING),
      projection_type=aws_dynamodb.ProjectionType.ALL
    )

    user_pool = aws_cognito.UserPool(self, 'UserPool',
      user_pool_name='UserPoolForApiGateway',
      removal_policy=cdk.RemovalPolicy.DESTROY,
      self_sign_up_enabled=True,
      sign_in_aliases={'email': True},
      auto_verify={'email': True},
      password_policy={
        'min_length': 8,
        'require_lowercase': False,
        'require_digits': False,
        'require_uppercase': False,
        'require_symbols': False,
      },
      account_recovery=aws_cognito.AccountRecovery.EMAIL_ONLY
    )

    user_pool_client = aws_cognito.UserPoolClient(self, 'UserPoolClient',
      user_pool=user_pool,
      auth_flows={
        'admin_user_password': True,
        'user_password': True,
        'custom': True,
        'user_srp': True
      },
      supported_identity_providers=[aws_cognito.UserPoolClientIdentityProvider.COGNITO]
    )

    auth = aws_apigateway.CognitoUserPoolsAuthorizer(self, 'AuthorizerForDynamoDBApi',
      cognito_user_pools=[user_pool]
    )

    ddb_access_policy_doc = aws_iam.PolicyDocument()
    ddb_access_policy_doc.add_statements(aws_iam.PolicyStatement(**{
      "effect": aws_iam.Effect.ALLOW,
      "resources": [ddb_table.table_arn],
      "actions": [
        "dynamodb:DeleteItem",
        "dynamodb:PartiQLInsert",
        "dynamodb:UpdateTimeToLive",
        "dynamodb:BatchWriteItem",
        "dynamodb:PutItem",
        "dynamodb:PartiQLUpdate",
        "dynamodb:UpdateItem",
        "dynamodb:PartiQLDelete"
      ]
    }))

    apigw_dynamodb_role = aws_iam.Role(self, "ApiGatewayRoleForDynamoDB",
      role_name='APIGatewayRoleForDynamoDB',
      assumed_by=aws_iam.ServicePrincipal('apigateway.amazonaws.com'),
      inline_policies={
        'DynamoDBAccessPolicy': ddb_access_policy_doc
      },
      managed_policies=[
        aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonDynamoDBReadOnlyAccess'),
      ]
    )

    dynamodb_api = aws_apigateway.RestApi(self, "DynamoDBProxyAPI",
      rest_api_name="comments-api",
      description="An Amazon API Gateway REST API that integrated with an Amazon DynamoDB.",
      endpoint_types=[aws_apigateway.EndpointType.REGIONAL],
      default_cors_preflight_options={
        "allow_origins": aws_apigateway.Cors.ALL_ORIGINS
      },
      deploy=True,
      deploy_options=aws_apigateway.StageOptions(stage_name="v1"),
      endpoint_export_name="DynamoDBProxyAPIEndpoint"
    )

    all_resources = dynamodb_api.root.add_resource("comments")
    one_resource = all_resources.add_resource("{pageId}")

    apigw_error_responses = [
      aws_apigateway.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"),
      aws_apigateway.IntegrationResponse(status_code="500", selection_pattern="5\d{2}")
    ]

    apigw_ok_responses = [
      aws_apigateway.IntegrationResponse(
        status_code="200"
      )
    ]

    ddb_put_item_options = aws_apigateway.IntegrationOptions(
      credentials_role=apigw_dynamodb_role,
      integration_responses=[*apigw_ok_responses, *apigw_error_responses],
      request_templates={
        'application/json': json.dumps({
          "TableName": DDB_TABLE_NAME,
          "Item": {
            "commentId": {
              "S": "$context.requestId"
            },
            "pageId": {
              "S": "$input.path('$.pageId')"
            },
            "userName": {
              "S": "$input.path('$.userName')"
            },
            "message": {
              "S": "$input.path('$.message')"
            }
          }
        }, indent=2)
      },
      passthrough_behavior=aws_apigateway.PassthroughBehavior.WHEN_NO_TEMPLATES
    )

    create_integration = aws_apigateway.AwsIntegration(
      service='dynamodb',
      action='PutItem',
      integration_http_method='POST',
      options=ddb_put_item_options
    )

    method_responses = [
      aws_apigateway.MethodResponse(status_code='200'),
      aws_apigateway.MethodResponse(status_code='400'),
      aws_apigateway.MethodResponse(status_code='500')
    ]

    all_resources.add_method('POST', create_integration,
      method_responses=method_responses,
      authorization_type=aws_apigateway.AuthorizationType.COGNITO,
      authorizer=auth
    )

    get_response_templates = '''
#set($inputRoot = $input.path('$'))
{
  "comments": [
    #foreach($elem in $inputRoot.Items) {
       "commentId": "$elem.commentId.S",
       "userName": "******",
       "message": "$elem.message.S"
     }#if($foreach.hasNext),#end
    #end
  ]
}'''

    ddb_query_item_options = aws_apigateway.IntegrationOptions(
      credentials_role=apigw_dynamodb_role,
      integration_responses=[
        aws_apigateway.IntegrationResponse(
          status_code="200",
          response_templates={
            'application/json': get_response_templates
          }
        ),
        *apigw_error_responses
      ],
      request_templates={
        'application/json': json.dumps({
          "TableName": DDB_TABLE_NAME,
          "IndexName": "pageId-index",
          "KeyConditionExpression": "pageId = :v1",
          "ExpressionAttributeValues": {
            ":v1": {
              "S": "$input.params('pageId')"
            }
          }
        }, indent=2)
      },
      passthrough_behavior=aws_apigateway.PassthroughBehavior.WHEN_NO_TEMPLATES
    )

    get_integration = aws_apigateway.AwsIntegration(
      service='dynamodb',
      action='Query',
      integration_http_method='POST',
      options=ddb_query_item_options
    )

    one_resource.add_method('GET', get_integration,
      method_responses=method_responses,
      authorization_type=aws_apigateway.AuthorizationType.COGNITO,
      authorizer=auth
    )

    cdk.CfnOutput(self, 'DynamoDBTableName', value=ddb_table.table_name)
    cdk.CfnOutput(self, 'UserPoolId', value=user_pool.user_pool_id)
    cdk.CfnOutput(self, 'UserPoolClientId', value=user_pool_client.user_pool_client_id)
Esempio n. 20
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        vpc = aws_ec2.Vpc(
            self,
            "EKKStackVPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #XXX: As there are no SSH public keys deployed on this machine,
        # you need to use EC2 Instance Connect with the command
        #  'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key.
        # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/
        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host)

        #TODO: SHOULD restrict IP range allowed to ssh acces
        bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0"))

        sg_use_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch client',
            security_group_name='use-es-cluster-sg')
        cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg')

        sg_es = aws_ec2.SecurityGroup(
            self,
            "ElasticSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an elasticsearch cluster',
            security_group_name='es-cluster-sg')
        cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg')

        sg_es.add_ingress_rule(peer=sg_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_use_es,
                               connection=aws_ec2.Port.all_tcp(),
                               description='use-es-cluster-sg')
        sg_es.add_ingress_rule(peer=sg_bastion_host,
                               connection=aws_ec2.Port.all_tcp(),
                               description='bastion-host-sg')

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        ES_DOMAIN_NAME = self.node.try_get_context("es_domain_name")
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            "ElasticSearch",
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name=ES_DOMAIN_NAME,
            elasticsearch_version="7.10",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(
                        service="es",
                        resource="domain",
                        resource_name="{}/*".format(ES_DOMAIN_NAME))
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids
            })
        cdk.Tags.of(es_cfn_domain).add('Name', ES_DOMAIN_NAME)

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="ekk-stack-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[
                                        es_cfn_domain.attr_arn,
                                        "{}/*".format(es_cfn_domain.attr_arn)
                                    ],
                                    actions=[
                                        "es:DescribeElasticsearchDomain",
                                        "es:DescribeElasticsearchDomains",
                                        "es:DescribeElasticsearchDomainConfig",
                                        "es:ESHttpPost", "es:ESHttpPut"
                                    ]))

        ES_INDEX_NAME = self.node.try_get_context("es_index_name")

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=[
                    es_cfn_domain.attr_arn,
                    "{}/*".format(es_cfn_domain.attr_arn)
                ],
                # resources=[
                #   "{aes_arn}/_all/_settings".format(aes_arn=es_cfn_domain.attr_arn),
                #   "{aes_arn}/_cluster/stats".format(aes_arn=es_cfn_domain.attr_arn),
                #   "{aes_arn}/{es_index_name}*/_mapping".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME),
                #   "{aes_arn}/_nodes".format(aes_arn=es_cfn_domain.attr_arn),
                #   "{aes_arn}/_nodes/*/stats".format(aes_arn=es_cfn_domain.attr_arn),
                #   "{aes_arn}/_stats".format(aes_arn=es_cfn_domain.attr_arn),
                #   "{aes_arn}/{es_index_name}*/_stats".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME)
                # ],
                actions=["es:ESHttpGet"]))

        firehose_log_group_name = "/aws/kinesisfirehose/{}".format(
            ES_INDEX_NAME)
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name="KinesisFirehoseServiceRole-{es_index}-{region}".format(
                es_index=ES_INDEX_NAME, region=cdk.Aws.REGION),
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        es_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty(
            role_arn=firehose_role.role_arn,
            security_group_ids=[sg_use_es.security_group_id],
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids)

        es_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty(
            index_name=ES_INDEX_NAME,
            role_arn=firehose_role.role_arn,
            s3_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Backup"
                },
                "compressionFormat":
                "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
                # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify
                # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console.
                "prefix": "{}/".format(ES_INDEX_NAME),
                "roleArn": firehose_role.role_arn
            },
            buffering_hints={
                "intervalInSeconds": 60,
                "sizeInMBs": 1
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "ElasticsearchDelivery"
            },
            domain_arn=es_cfn_domain.attr_arn,
            index_rotation_period=
            "NoRotation",  # [NoRotation | OneDay | OneHour | OneMonth | OneWeek]
            retry_options={"durationInSeconds": 60},
            s3_backup_mode=
            "FailedDocumentsOnly",  # [AllDocuments | FailedDocumentsOnly]
            vpc_configuration=es_dest_vpc_config)

        firehose_to_es_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KinesisFirehoseToES",
            delivery_stream_name=ES_INDEX_NAME,
            delivery_stream_type="DirectPut",
            elasticsearch_destination_configuration=es_dest_config,
            tags=[{
                "key": "Name",
                "value": ES_DOMAIN_NAME
            }])