Beispiel #1
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        MEMORYDB_USER_NAME = cdk.CfnParameter(
            self,
            'MemoryDBUserName',
            type='String',
            description='memory db user name',
            default='memdb-admin')

        MEMORYDB_USER_PASSWORD = cdk.CfnParameter(
            self,
            'MemoryDBUserPassword',
            type='String',
            description='memory db user password (16~128 printable characters)'
        )

        memorydb_user = aws_memorydb.CfnUser(
            self,
            'MemoryDBUser',
            user_name=MEMORYDB_USER_NAME.value_as_string,
            # refer to https://redis.io/topics/acl
            access_string='on ~* &* +@all',
            # refer to https://docs.aws.amazon.com/cli/latest/reference/memorydb/create-user.html
            authentication_mode={
                "Type": "password",
                "Passwords": [MEMORYDB_USER_PASSWORD.value_as_string]
            })

        self.memorydb_acl = aws_memorydb.CfnACL(
            self,
            'MemoryDBAcl',
            acl_name='my-memorydb-acl',
            user_names=[memorydb_user.user_name])

        cdk.CfnOutput(self,
                      'MemoryDBACL',
                      value=self.memorydb_acl.acl_name,
                      export_name='MemoryDBACL')
Beispiel #2
0
  def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
    super().__init__(scope, construct_id, **kwargs)

    s3_bucket_name = cdk.CfnParameter(self, 'S3BucketForStaticContents',
      type='String',
      description='s3 bucket that the site contents are deployed to'
    )

    site_bucket = s3.Bucket.from_bucket_name(self, 'S3BucketForStaticSite', s3_bucket_name.value_as_string)

    cloudfrontOAI = cloudfront.OriginAccessIdentity(self, 'CloudFrontOAI',
      comment="Allows CloudFront to reach the bucket: {name}".format(name=s3_bucket_name.value_as_string)
    );
    cloudfrontOAI.apply_removal_policy(cdk.RemovalPolicy.DESTROY)

    #XXX: Add policy document the existing s3 bucket
    #XXX: https://stackoverflow.com/questions/60087302/how-to-add-resource-policy-to-existing-s3-bucket-with-cdk-in-javascript
    site_bucket_policy_statement = aws_iam.PolicyStatement(**{
      'actions': ['s3:GetObject'],
      'resources': [site_bucket.arn_for_objects('*')],
      'principals': [aws_iam.CanonicalUserPrincipal(cloudfrontOAI.cloud_front_origin_access_identity_s3_canonical_user_id)]
    })

    s3.CfnBucketPolicy(self, 'SiteBucketPolicy',
      bucket=site_bucket.bucket_name,
      policy_document=aws_iam.PolicyDocument(statements=[site_bucket_policy_statement])
    )

    distribution = cloudfront.Distribution(self, "myDist",
      default_behavior=cloudfront.BehaviorOptions(
        origin=cf_origins.S3Origin(bucket=site_bucket, origin_access_identity=cloudfrontOAI)
      ),
      error_responses=[
        #XXX: If you have accessed root page of cloudfront url (i.e. https://your-domain.cloudfront.net/),
        #XXX: 403:Forbidden error might occur. In order to prevent this error,
        #XXX: configure 403:Forbidden error response page to be 'index.html'
        cloudfront.ErrorResponse(http_status=403, response_http_status=200,
          response_page_path='/index.html', ttl=cdk.Duration.seconds(10)),
        #XXX: Configure 404:NotFound error response page to be 'error.html'
        cloudfront.ErrorResponse(http_status=404, response_http_status=404,
          response_page_path='/error.html', ttl=cdk.Duration.seconds(10))
      ]
    )

    cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName')
    cdk.CfnOutput(self, 'SiteBucket', value=site_bucket.bucket_name, export_name='SiteBucket')
    cdk.CfnOutput(self, 'DistributionId', value=distribution.distribution_id, export_name='DistributionId')
    cdk.CfnOutput(self, 'DistributionDomainName', value=distribution.distribution_domain_name, export_name='DistributionDomainName')
    cdk.CfnOutput(self, 'CloudFrontOriginAccessId', value=cloudfrontOAI.cloud_front_origin_access_identity_s3_canonical_user_id, export_name='CloudFrontOAI')
Beispiel #3
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        vpc_name = self.node.try_get_context("vpc_name")
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      "ExistingVPC",
                                      is_default=True,
                                      vpc_name=vpc_name)

        # vpc = aws_ec2.Vpc(self, "Ec2WithPemKeyStackVPC",
        #   max_azs=2,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'BastionHostPublicDNSName',
                      value=bastion_host.instance_public_dns_name,
                      export_name='BastionHostPublicDNSName')
Beispiel #4
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        vpc = aws_ec2.Vpc(
            self,
            "FirehoseToS3VPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="firehose-to-s3-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        FIREHOSE_STREAM_NAME = cdk.CfnParameter(
            self,
            'FirehoseStreamName',
            type='String',
            description='kinesis data firehose stream name',
            default='PUT-S3-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))))

        FIREHOSE_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseBufferSize',
            type='Number',
            description='kinesis data firehose buffer size',
            min_value=1,
            max_value=128,
            default=128)

        FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseBufferInterval',
            type='Number',
            description='kinesis data firehose buffer interval',
            min_value=60,
            max_value=300,
            default=60)

        FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferSize',
            type='Number',
            description=
            'kinesis data firehose buffer size for AWS Lambda to transform records',
            min_value=1,
            max_value=3,
            default=3)

        FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferInterval',
            type='Number',
            description=
            'kinesis data firehose buffer interval for AWS Lambda to transform records',
            min_value=60,
            max_value=900,
            default=300)

        FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter(
            self,
            'FirehoseLambdaNumberOfRetries',
            type='Number',
            description=
            'Number of retries for AWS Lambda to transform records in kinesis data firehose',
            min_value=1,
            max_value=5,
            default=3)

        FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter(
            self,
            'FirehosePrefix',
            type='String',
            description='kinesis data firehose S3 prefix')

        FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter(
            self,
            'FirehoseErrorOutputPrefix',
            type='String',
            description='kinesis data firehose S3 error output prefix',
            default=
            'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}'
        )

        METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor"
        metadata_extract_lambda_fn = aws_lambda.Function(
            self,
            "MetadataExtractor",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            function_name="MetadataExtractor",
            handler="metadata_extractor.lambda_handler",
            description="Extract partition keys from records",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            timeout=cdk.Duration.minutes(5))

        log_group = aws_logs.LogGroup(
            self,
            "MetadataExtractorLogGroup",
            #XXX: Circular dependency between resources occurs
            # if aws_lambda.Function.function_name is used
            # instead of literal name of lambda function such as "MetadataExtractor"
            log_group_name="/aws/lambda/{}".format(
                METADATA_EXTRACT_LAMBDA_FN_NAME),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        log_group.grant_write(metadata_extract_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html
        # String-encoded tokens:
        #  Avoid manipulating the string in other ways. For example,
        #  taking a substring of a string is likely to break the string token.
        firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    #XXX: The ARN will be formatted as follows:
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                    "resources": [
                        self.format_arn(
                            partition="aws",
                            service="lambda",
                            region=cdk.Aws.REGION,
                            account=cdk.Aws.ACCOUNT_ID,
                            resource="function",
                            resource_name="{}:*".format(
                                metadata_extract_lambda_fn.function_name),
                            arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                    ],
                    "actions": [
                        "lambda:InvokeFunction",
                        "lambda:GetFunctionConfiguration"
                    ]
                }))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name="KinesisFirehoseServiceRole-{stream_name}-{region}".
            format(stream_name=FIREHOSE_STREAM_NAME.value_as_string,
                   region=cdk.Aws.REGION),
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            path='/service-role/',
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        lambda_proc = cfn.ProcessorProperty(
            type="Lambda",
            parameters=[
                cfn.ProcessorParameterProperty(
                    parameter_name="LambdaArn",
                    parameter_value='{}:{}'.format(
                        metadata_extract_lambda_fn.function_arn,
                        metadata_extract_lambda_fn.current_version.version)),
                cfn.ProcessorParameterProperty(
                    parameter_name="NumberOfRetries",
                    parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES.
                    value_as_string),
                cfn.ProcessorParameterProperty(
                    parameter_name="RoleArn",
                    parameter_value=firehose_role.role_arn),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferSizeInMBs",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string
                ),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferIntervalInSeconds",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL.
                    value_as_string)
            ])

        record_deaggregation_proc = cfn.ProcessorProperty(
            type="RecordDeAggregation",
            parameters=[
                cfn.ProcessorParameterProperty(parameter_name="SubRecordType",
                                               parameter_value="JSON")
            ])

        #XXX: Adding a new line delimiter when delivering data to S3
        # This is also particularly useful when dynamic partitioning is applied to aggregated data
        # because multirecord deaggregation (which must be applied to aggregated data
        # before it can be dynamically partitioned) removes new lines from records as part of the parsing process.
        # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter
        append_delim_to_record_proc = cfn.ProcessorProperty(
            type="AppendDelimiterToRecord", parameters=[])

        firehose_processing_config = cfn.ProcessingConfigurationProperty(
            enabled=True,
            processors=[
                record_deaggregation_proc, append_delim_to_record_proc,
                lambda_proc
            ])

        ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=s3_bucket.bucket_arn,
            role_arn=firehose_role.role_arn,
            buffering_hints={
                "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number,
                "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "DestinationDelivery"
            },
            compression_format=
            "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
            data_format_conversion_configuration={"enabled": False},
            dynamic_partitioning_configuration={
                "enabled": True,
                "retryOptions": {
                    "durationInSeconds": 300
                }
            },
            error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX.
            value_as_string,
            prefix=FIREHOSE_TO_S3_PREFIX.value_as_string,
            processing_configuration=firehose_processing_config)

        firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "FirehoseToS3",
            delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            extended_s3_destination_configuration=ext_s3_dest_config,
            tags=[{
                "key": "Name",
                "value": FIREHOSE_STREAM_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'StackName',
                      value=self.stack_name,
                      export_name='StackName')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
Beispiel #5
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        JENKINS_USER = cdk.CfnParameter(self,
                                        'JenkinsUser',
                                        type='String',
                                        description='Jenkins user name')

        JENKINS_API_TOKEN = cdk.CfnParameter(
            self,
            'JenkinsAPIToken',
            type='String',
            description='Jenkins user api token',
            no_echo=True)

        JENKINS_URL = cdk.CfnParameter(self,
                                       'JenkinsUrl',
                                       type='String',
                                       description='Jenkins url')

        secret_value = aws_secretsmanager.SecretStringValueBeta1.from_token(
            json.dumps({
                JENKINS_USER.value_as_string:
                JENKINS_API_TOKEN.value_as_string
            }))

        jenkins_api_user_token_secret = aws_secretsmanager.Secret(
            self,
            "JenkinsAPIUserTokenSecret",
            secret_string_beta1=secret_value,
            description=
            "Secret to store jenkins username and personal access token")

        jenkins_trigger_lambda_fn = aws_lambda.Function(
            self,
            "LambdaJenkinsTrigger",
            runtime=aws_lambda.Runtime.PYTHON_3_8,
            function_name="SageMakerJenkins-LambdaJenkinsTrigger",
            handler="lambda_jenkins_trigger.lambda_handler",
            description=
            "Lambda function invoked by SageMaker Model Package State change",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            environment={
                "JenkinsAPIUserTokenSecret":
                jenkins_api_user_token_secret.secret_name,
                "JenkinsUrl": JENKINS_URL.value_as_string,
            },
            timeout=cdk.Duration.minutes(5))

        jenkins_trigger_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:secretsmanager:*:*:*"],
                                    actions=["secretsmanager:GetSecretValue"]))

        jenkins_trigger_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=["logs:*"]))

        event_rule = aws_events.Rule(
            self,
            "JenkinsTriggerRule",
            rule_name="SageMakerJenkinsTriggerRule",
            event_pattern={
                "account": [self.account],
                "source": ["aws.sagemaker"],
                "detail_type": ["SageMaker Model Package State Change"],
                "detail": {
                    "ModelApprovalStatus": ["Approved", "Rejected"]
                }
            },
            description=
            '''Rule to trigger a deployment when SageMaker Model registry is updated with a new model package.
For example, a new model package is registered with Registry''')
        event_rule.add_target(
            aws_events_targets.LambdaFunction(jenkins_trigger_lambda_fn))
        event_rule.apply_removal_policy(cdk.RemovalPolicy.DESTROY)

        cdk.CfnOutput(self,
                      'JenkinsAPIUserTokenSecretName',
                      value=jenkins_api_user_token_secret.secret_name,
                      export_name='JenkinsAPIUserTokenSecret')
        cdk.CfnOutput(self,
                      'JenkinsTriggerLambdaFunctionName',
                      value=jenkins_trigger_lambda_fn.function_name,
                      export_name='LambdaJenkinsTrigger')
Beispiel #6
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        SAGEMAKER_NOTEBOOK_INSTANCE_TYPE = cdk.CfnParameter(
            self,
            'SageMakerNotebookInstanceType',
            type='String',
            description='Amazon SageMaker Notebook instance type',
            default='ml.t2.medium')

        #XXX: For createing Amazon MWAA in the existing VPC,
        # remove comments from the below codes and
        # comments out vpc = aws_ec2.Vpc(..) codes,
        # then pass -c vpc_name=your-existing-vpc to cdk command
        # for example,
        # cdk -c vpc_name=your-existing-vpc syth
        #
        vpc_name = self.node.try_get_context('vpc_name')
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      'ExistingVPC',
                                      is_default=True,
                                      vpc_name=vpc_name)

        #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack.
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html
        # vpc = aws_ec2.Vpc(self, 'SageMakerStudioVPC',
        #   max_azs=2,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        sg_sagemaker_notebook_instance = aws_ec2.SecurityGroup(
            self,
            "SageMakerNotebookSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='Security group with no ingress rule',
            security_group_name='sagemaker-nb-{}-sg'.format(''.join(
                random.sample((string.ascii_letters), k=5))))
        sg_sagemaker_notebook_instance.add_ingress_rule(
            peer=sg_sagemaker_notebook_instance,
            connection=aws_ec2.Port.all_traffic(),
            description='sagemaker notebook security group')
        cdk.Tags.of(sg_sagemaker_notebook_instance).add(
            'Name', 'sagemaker-nb-sg')

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": ["arn:aws:s3:::*"],
                    "actions": [
                        "s3:GetObject", "s3:PutObject", "s3:DeleteObject",
                        "s3:ListBucket"
                    ]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookRole',
            role_name='SageMakerNotebookRole-{suffix}'.format(
                suffix=''.join(random.sample((string.ascii_letters), k=5))),
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            inline_policies={
                'sagemaker-custome-execution-role':
                sagemaker_notebook_role_policy_doc
            },
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSageMakerFullAccess'),
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSCloudFormationReadOnlyAccess')
            ])

        #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance
        # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport
        sagemaker_nb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'

echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
tar zxfv mecab-0.996-ko-0.9.2.tar.gz
pushd mecab-0.996-ko-0.9.2
./configure
make
make check
sudo make install
sudo ldconfig
mecab -v
mecab-config --version
popd

curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
tar -zxvf mecab-ko-dic-2.1.1-20180720.tar.gz
pushd mecab-ko-dic-2.1.1-20180720
./autogen.sh
./configure
make
sudo make install
popd

for each in python3 pytorch_latest_p36
do
    source /home/ec2-user/anaconda3/bin/activate ${{each}}
    pip install --upgrade pretty_errors
    pip install --upgrade pandas-profiling[notebook]
    pip install --upgrade ipython-sql
    pip install --upgrade PyMySQL
    pip install torchvision
    pip install torchtext
    pip install spacy
    pip install nltk
    pip install requests
    pip install mecab-python
    pip install konlpy
    pip install jpype1-py3
    conda deactivate
done
EOF
'''.format(AWS_Region=cdk.Aws.REGION)

        sagemaker_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=cdk.Fn.base64(sagemaker_nb_lifecycle_content))

        sagemaker_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'SageMakerNotebookLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'SageMakerNotebookLifeCycleConfig',
            on_start=[sagemaker_lifecycle_config_prop])

        sagemaker_notebook_instance = aws_sagemaker.CfnNotebookInstance(
            self,
            'SageMakerNotebookInstance',
            instance_type=SAGEMAKER_NOTEBOOK_INSTANCE_TYPE.value_as_string,
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=sagemaker_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='MySageMakerWorkbook',
            root_access='Disabled',
            security_group_ids=[
                sg_sagemaker_notebook_instance.security_group_id
            ],
            subnet_id=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids[0])
Beispiel #7
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter(
            self,
            'OpenSearchDomainName',
            type='String',
            description='Amazon OpenSearch Service domain name',
            default='opensearch-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[a-z]+[A-Za-z0-9\-]+')

        OPENSEARCH_INDEX_NAME = cdk.CfnParameter(
            self,
            'SearchIndexName',
            type='String',
            description='Amazon OpenSearch Service index name')

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        #
        vpc = aws_ec2.Vpc(
            self,
            "EKKStackVPC",
            max_azs=3,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        sg_use_opensearch = aws_ec2.SecurityGroup(
            self,
            "OpenSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch client',
            security_group_name='use-opensearch-cluster-sg')
        cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg')

        sg_opensearch_cluster = aws_ec2.SecurityGroup(
            self,
            "OpenSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch cluster',
            security_group_name='opensearch-cluster-sg')
        cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_opensearch_cluster,
            connection=aws_ec2.Port.all_tcp(),
            description='opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp(443),
            description='use-opensearch-cluster-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='use-opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp(443),
            description='bastion-host-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='bastion-host-sg')

        master_user_secret = aws_secretsmanager.Secret(
            self,
            "OpenSearchMasterUserSecret",
            generate_secret_string=aws_secretsmanager.SecretStringGenerator(
                secret_string_template=json.dumps({"username": "******"}),
                generate_string_key="password",
                # Master password must be at least 8 characters long and contain at least one uppercase letter,
                # one lowercase letter, one number, and one special character.
                password_length=8))

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        # You should camelCase the property names instead of PascalCase
        opensearch_domain = aws_opensearchservice.Domain(
            self,
            "OpenSearch",
            domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string,
            version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0,
            #XXX: You cannot use graviton instances with non-graviton instances.
            # Use graviton instances as data nodes or use non-graviton instances as master nodes.
            capacity={
                "master_nodes": 3,
                "master_node_instance_type": "r6g.large.search",
                "data_nodes": 3,
                "data_node_instance_type": "r6g.large.search"
            },
            ebs={
                "volume_size": 10,
                "volume_type": aws_ec2.EbsDeviceVolumeType.GP2
            },
            #XXX: az_count must be equal to vpc subnets count.
            zone_awareness={"availability_zone_count": 3},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            },
            fine_grained_access_control=aws_opensearchservice.
            AdvancedSecurityOptions(
                master_user_name=master_user_secret.secret_value_from_json(
                    "username").to_string(),
                master_user_password=master_user_secret.secret_value_from_json(
                    "password")),
            # Enforce HTTPS is required when fine-grained access control is enabled.
            enforce_https=True,
            # Node-to-node encryption is required when fine-grained access control is enabled
            node_to_node_encryption=True,
            # Encryption-at-rest is required when fine-grained access control is enabled.
            encryption_at_rest={"enabled": True},
            use_unsigned_basic_auth=True,
            security_groups=[sg_opensearch_cluster],
            automated_snapshot_start_hour=17,  # 2 AM (GTM+9)
            vpc=vpc,
            vpc_subnets=[
                aws_ec2.SubnetSelection(
                    one_per_az=True,
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT)
            ],
            removal_policy=cdk.RemovalPolicy.
            DESTROY  # default: cdk.RemovalPolicy.RETAIN
        )
        cdk.Tags.of(opensearch_domain).add(
            'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}')

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="opskk-stack-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=[
                    opensearch_domain.domain_arn,
                    "{}/*".format(opensearch_domain.domain_arn)
                ],
                actions=[
                    "es:DescribeElasticsearchDomain",
                    "es:DescribeElasticsearchDomains",
                    "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost",
                    "es:ESHttpPut"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/
                resources=[
                    opensearch_domain.domain_arn,
                    f"{opensearch_domain.domain_arn}/_all/_settings",
                    f"{opensearch_domain.domain_arn}/_cluster/stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%",
                    f"{opensearch_domain.domain_arn}/_nodes",
                    f"{opensearch_domain.domain_arn}/_nodes/stats",
                    f"{opensearch_domain.domain_arn}/_nodes/*/stats",
                    f"{opensearch_domain.domain_arn}/_stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats"
                ],
                actions=["es:ESHttpGet"]))

        firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name=
            f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty(
            role_arn=firehose_role.role_arn,
            security_group_ids=[sg_use_opensearch.security_group_id],
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids)

        opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty(
            index_name=OPENSEARCH_INDEX_NAME.value_as_string,
            role_arn=firehose_role.role_arn,
            s3_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Backup"
                },
                "compressionFormat":
                "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
                # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify
                # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console.
                "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/",
                "roleArn": firehose_role.role_arn
            },
            buffering_hints={
                "intervalInSeconds": 60,
                "sizeInMBs": 1
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "ElasticsearchDelivery"
            },
            domain_arn=opensearch_domain.domain_arn,
            index_rotation_period=
            "NoRotation",  # [NoRotation | OneDay | OneHour | OneMonth | OneWeek]
            retry_options={"durationInSeconds": 60},
            s3_backup_mode=
            "FailedDocumentsOnly",  # [AllDocuments | FailedDocumentsOnly]
            vpc_configuration=opensearch_dest_vpc_config)

        firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KinesisFirehoseToES",
            delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            elasticsearch_destination_configuration=opensearch_dest_config,
            tags=[{
                "key": "Name",
                "value": OPENSEARCH_INDEX_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'OpenSearchDomainEndpoint',
                      value=opensearch_domain.domain_endpoint,
                      export_name='OpenSearchDomainEndpoint')
        cdk.CfnOutput(
            self,
            'OpenSearchDashboardsURL',
            value=f"{opensearch_domain.domain_endpoint}/_dashboards/",
            export_name='OpenSearchDashboardsURL')
        cdk.CfnOutput(self,
                      'MasterUserSecretId',
                      value=master_user_secret.secret_name,
                      export_name='MasterUserSecretId')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
        cdk.CfnOutput(self,
                      'FirehoseRoleArn',
                      value=firehose_role.role_arn,
                      export_name='FirehoseRoleArn')
Beispiel #8
0
  def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
    super().__init__(scope, construct_id, **kwargs)

    EMR_EC2_KEY_PAIR_NAME = cdk.CfnParameter(self, 'EMREC2KeyPairName',
      type='String',
      description='Amazon EMR EC2 Instance KeyPair name',
      default='emr'
    )

    EMR_CLUSTER_NAME = cdk.CfnParameter(self, 'EMRClusterName',
      type='String',
      description='Amazon EMR Cluster name',
      default='my-emr-cluster'
    )

    vpc_name = self.node.try_get_context("vpc_name")
    vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
      is_default=True,
      vpc_name=vpc_name)

    # vpc = aws_ec2.Vpc(self, "EMRStackVPC",
    #   max_azs=2,
    #   gateway_endpoints={
    #     "S3": aws_ec2.GatewayVpcEndpointOptions(
    #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
    #     )
    #   }
    # )

    emr_instances = aws_emr.CfnCluster.JobFlowInstancesConfigProperty(
      core_instance_group=aws_emr.CfnCluster.InstanceGroupConfigProperty(
        instance_count=2,
        instance_type="m5.xlarge",
        market="ON_DEMAND"
      ),
      ec2_subnet_id=vpc.public_subnets[0].subnet_id,
      keep_job_flow_alive_when_no_steps=True, # After last step completes: Cluster waits
      master_instance_group=aws_emr.CfnCluster.InstanceGroupConfigProperty(
        instance_count=1,
        instance_type="m5.xlarge",
        market="ON_DEMAND"
      ),
      termination_protected=True
    )

    emr_cfn_cluster = aws_emr.CfnCluster(self, "MyEMRCluster",
      instances=emr_instances,
      # In order to use the default role for `job_flow_role`, you must have already created it using the CLI or console
      job_flow_role="EMR_EC2_DefaultRole",
      name=EMR_CLUSTER_NAME.value_as_string,
      # service_role="EMR_DefaultRole_V2",
      service_role="EMR_DefaultRole",
      applications=[
        aws_emr.CfnCluster.ApplicationProperty(name="Hadoop"),
        aws_emr.CfnCluster.ApplicationProperty(name="Hive"),
        aws_emr.CfnCluster.ApplicationProperty(name="JupyterHub"),
        aws_emr.CfnCluster.ApplicationProperty(name="Livy"),
        aws_emr.CfnCluster.ApplicationProperty(name="Spark"),
        aws_emr.CfnCluster.ApplicationProperty(name="JupyterEnterpriseGateway")
      ],
      bootstrap_actions=None,
      configurations=[
        aws_emr.CfnCluster.ConfigurationProperty(
          classification="hive-site",
          configuration_properties={
            "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
          }),
        aws_emr.CfnCluster.ConfigurationProperty(
          classification="spark-hive-site",
          configuration_properties={
            "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
          })
      ],
      ebs_root_volume_size=10,
      log_uri="s3n://aws-logs-{account}-{region}/elasticmapreduce/".format(account=cdk.Aws.ACCOUNT_ID, region=cdk.Aws.REGION),
      release_label="emr-6.5.0",
      scale_down_behavior="TERMINATE_AT_TASK_COMPLETION",
      # tags=[cdk.CfnTag(
      #   key="for-use-with-amazon-emr-managed-policies",
      #   value="true"
      # )],
      visible_to_all_users=True
    )
Beispiel #9
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        vpc_name = self.node.try_get_context("vpc_name")
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      "ExistingVPC",
                                      is_default=True,
                                      vpc_name=vpc_name)

        # vpc = aws_ec2.Vpc(self, "JenkinsOnEC2Stack",
        #   max_azs=2,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_jenkins_host = aws_ec2.SecurityGroup(
            self,
            "JenkinsHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an jenkins host',
            security_group_name='jenkins-host-sg')
        cdk.Tags.of(sg_jenkins_host).add('Name', 'jenkins-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')
        sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(80),
                                         description='HTTP access')

        jenkins_host = aws_ec2.Instance(
            self,
            "JenkinsHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(
                generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
                edition=aws_ec2.AmazonLinuxEdition.STANDARD,
                kernel=aws_ec2.AmazonLinuxKernel.KERNEL5_X),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_jenkins_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        # Script in S3 as Asset
        user_data_asset = aws_s3_assets.Asset(
            self,
            "JenkinsEC2UserData",
            path=os.path.join(os.path.dirname(__file__),
                              "user-data/install_jenkins.sh"))

        local_path = jenkins_host.user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key)

        # Userdata executes script from S3
        jenkins_host.user_data.add_execute_file_command(file_path=local_path)
        user_data_asset.grant_read(jenkins_host.role)

        cdk.CfnOutput(self,
                      'JenkinsHostId',
                      value=jenkins_host.instance_id,
                      export_name='JenkinsHostId')
        cdk.CfnOutput(self,
                      'JenkinsHostPublicDNSName',
                      value=jenkins_host.instance_public_dns_name,
                      export_name='JenkinsHostPublicDNSName')
Beispiel #10
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter(
            self,
            'OpenSearchDomainName',
            type='String',
            description='Amazon OpenSearch Service domain name',
            default='opensearch-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[a-z]+[A-Za-z0-9\-]+')

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        #XXX: For createing Amazon MWAA in the existing VPC,
        # remove comments from the below codes and
        # comments out vpc = aws_ec2.Vpc(..) codes,
        # then pass -c vpc_name=your-existing-vpc to cdk command
        # for example,
        # cdk -c vpc_name=your-existing-vpc syth
        #
        # vpc_name = self.node.try_get_context('vpc_name')
        # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC',
        #   is_default=True,
        #   vpc_name=vpc_name
        # )

        vpc = aws_ec2.Vpc(
            self,
            "OpenSearchVPC",
            max_azs=3,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        sg_use_opensearch = aws_ec2.SecurityGroup(
            self,
            "OpenSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch client',
            security_group_name='use-opensearch-cluster-sg')
        cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg')

        sg_opensearch_cluster = aws_ec2.SecurityGroup(
            self,
            "OpenSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch cluster',
            security_group_name='opensearch-cluster-sg')
        cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_opensearch_cluster,
            connection=aws_ec2.Port.all_tcp(),
            description='opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp(443),
            description='use-opensearch-cluster-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='use-opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp(443),
            description='bastion-host-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='bastion-host-sg')

        master_user_secret = aws_secretsmanager.Secret(
            self,
            "OpenSearchMasterUserSecret",
            generate_secret_string=aws_secretsmanager.SecretStringGenerator(
                secret_string_template=json.dumps({"username": "******"}),
                generate_string_key="password",
                # Master password must be at least 8 characters long and contain at least one uppercase letter,
                # one lowercase letter, one number, and one special character.
                password_length=8))

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        # You should camelCase the property names instead of PascalCase
        opensearch_domain = aws_opensearchservice.Domain(
            self,
            "OpenSearch",
            domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string,
            version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0,
            capacity={
                "master_nodes": 3,
                "master_node_instance_type": "r6g.large.search",
                "data_nodes": 3,
                "data_node_instance_type": "r6g.large.search"
            },
            ebs={
                "volume_size": 10,
                "volume_type": aws_ec2.EbsDeviceVolumeType.GP2
            },
            #XXX: az_count must be equal to vpc subnets count.
            zone_awareness={"availability_zone_count": 3},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            },
            fine_grained_access_control=aws_opensearchservice.
            AdvancedSecurityOptions(
                master_user_name=master_user_secret.secret_value_from_json(
                    "username").to_string(),
                master_user_password=master_user_secret.secret_value_from_json(
                    "password")),
            # Enforce HTTPS is required when fine-grained access control is enabled.
            enforce_https=True,
            # Node-to-node encryption is required when fine-grained access control is enabled
            node_to_node_encryption=True,
            # Encryption-at-rest is required when fine-grained access control is enabled.
            encryption_at_rest={"enabled": True},
            use_unsigned_basic_auth=True,
            security_groups=[sg_opensearch_cluster],
            automated_snapshot_start_hour=17,  # 2 AM (GTM+9)
            vpc=vpc,
            vpc_subnets=[
                aws_ec2.SubnetSelection(
                    one_per_az=True,
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT)
            ],
            removal_policy=cdk.RemovalPolicy.
            DESTROY  # default: cdk.RemovalPolicy.RETAIN
        )
        cdk.Tags.of(opensearch_domain).add(
            'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}')

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'OpenSearchDomainEndpoint',
                      value=opensearch_domain.domain_endpoint,
                      export_name='OpenSearchDomainEndpoint')
        cdk.CfnOutput(
            self,
            'OpenSearchDashboardsURL',
            value=f"{opensearch_domain.domain_endpoint}/_dashboards/",
            export_name='OpenSearchDashboardsURL')
        cdk.CfnOutput(self,
                      'MasterUserSecretId',
                      value=master_user_secret.secret_name,
                      export_name='MasterUserSecretId')
Beispiel #11
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        vpc_name = self.node.try_get_context('vpc_name')
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      'ExistingVPC',
                                      is_default=True,
                                      vpc_name=vpc_name)

        #XXX: create new vpc for msk cluster
        # vpc = aws_ec2.Vpc(self, 'VpcStack',
        #   max_azs=3,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        MSK_CLUSTER_NAME = cdk.CfnParameter(
            self,
            'KafkaClusterName',
            type='String',
            description='Managed Streaming for Apache Kafka cluster name',
            default='MSK-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[A-Za-z0-9\-]+')

        KAFA_VERSION = cdk.CfnParameter(
            self,
            'KafkaVersion',
            type='String',
            description='Apache Kafka version',
            default='2.6.2',
            # Supported Apache Kafka versions
            # https://docs.aws.amazon.com/msk/latest/developerguide/supported-kafka-versions.html
            allowed_values=[
                '2.8.1', '2.8.0', '2.7.1', '2.6.2', '2.6.1', '2.6.0', '2.5.1',
                '2.4.1.1', '2.3.1', '2.2.1'
            ])

        #XXX: change broker instance type
        KAFA_BROKER_INSTANCE_TYPE = cdk.CfnParameter(
            self,
            'KafkaBrokerInstanceType',
            type='String',
            description='Apache Kafka Broker instance type',
            default='kafka.m5.large')

        #XXX: change volume size
        KAFA_BROKER_EBS_VOLUME_SIZE = cdk.CfnParameter(
            self,
            'KafkaBrokerEBSVolumeSize',
            type='Number',
            description=
            'Apache Kafka Broker EBS Volume size (Minimum: 1 GiB, Maximum: 16384 GiB)',
            default='100',
            min_value=1,
            max_value=16384)

        MSK_CLIENT_SG_NAME = 'use-msk-sg-{}'.format(''.join(
            random.sample((string.ascii_lowercase), k=5)))
        sg_use_msk = aws_ec2.SecurityGroup(
            self,
            'KafkaClientSecurityGroup',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for Amazon MSK client',
            security_group_name=MSK_CLIENT_SG_NAME)
        cdk.Tags.of(sg_use_msk).add('Name', MSK_CLIENT_SG_NAME)

        MSK_CLUSTER_SG_NAME = 'msk-sg-{}'.format(''.join(
            random.sample((string.ascii_lowercase), k=5)))
        sg_msk_cluster = aws_ec2.SecurityGroup(
            self,
            'MSKSecurityGroup',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for Amazon MSK Cluster',
            security_group_name=MSK_CLUSTER_SG_NAME)
        sg_msk_cluster.add_ingress_rule(
            peer=sg_use_msk,
            connection=aws_ec2.Port.tcp(2181),
            description='msk client security group')
        sg_msk_cluster.add_ingress_rule(
            peer=sg_use_msk,
            connection=aws_ec2.Port.tcp(9092),
            description='msk client security group')
        sg_msk_cluster.add_ingress_rule(
            peer=sg_use_msk,
            connection=aws_ec2.Port.tcp(9094),
            description='msk client security group')
        cdk.Tags.of(sg_msk_cluster).add('Name', MSK_CLUSTER_SG_NAME)

        msk_broker_ebs_storage_info = aws_msk.CfnCluster.EBSStorageInfoProperty(
            volume_size=KAFA_BROKER_EBS_VOLUME_SIZE.value_as_number)

        msk_broker_storage_info = aws_msk.CfnCluster.StorageInfoProperty(
            ebs_storage_info=msk_broker_ebs_storage_info)

        msk_broker_node_group_info = aws_msk.CfnCluster.BrokerNodeGroupInfoProperty(
            client_subnets=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids,
            instance_type=KAFA_BROKER_INSTANCE_TYPE.value_as_string,
            security_groups=[
                sg_use_msk.security_group_id, sg_msk_cluster.security_group_id
            ],
            storage_info=msk_broker_storage_info)

        msk_encryption_info = aws_msk.CfnCluster.EncryptionInfoProperty(
            encryption_in_transit=aws_msk.CfnCluster.
            EncryptionInTransitProperty(client_broker='TLS_PLAINTEXT',
                                        in_cluster=True))

        msk_cluster = aws_msk.CfnCluster(
            self,
            'AWSKafkaCluster',
            broker_node_group_info=msk_broker_node_group_info,
            cluster_name=MSK_CLUSTER_NAME.value_as_string,
            #XXX: Supported Apache Kafka versions
            # https://docs.aws.amazon.com/msk/latest/developerguide/supported-kafka-versions.html
            kafka_version=KAFA_VERSION.value_as_string,
            number_of_broker_nodes=3,
            encryption_info=msk_encryption_info,
            enhanced_monitoring='PER_TOPIC_PER_BROKER')

        amzn_linux = aws_ec2.MachineImage.latest_amazon_linux(
            generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=aws_ec2.AmazonLinuxEdition.STANDARD,
            virtualization=aws_ec2.AmazonLinuxVirt.HVM,
            storage=aws_ec2.AmazonLinuxStorage.GENERAL_PURPOSE,
            cpu_type=aws_ec2.AmazonLinuxCpuType.X86_64)

        KAFKA_CLIENT_EC2_SG_NAME = 'kafka-client-ec2-sg-{}'.format(''.join(
            random.sample((string.ascii_lowercase), k=5)))
        sg_kafka_client_ec2_instance = aws_ec2.SecurityGroup(
            self,
            'KafkaClientEC2InstanceSG',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for Kafka Client EC2 Instance',
            security_group_name=KAFKA_CLIENT_EC2_SG_NAME)
        cdk.Tags.of(sg_kafka_client_ec2_instance).add(
            'Name', KAFKA_CLIENT_EC2_SG_NAME)
        sg_kafka_client_ec2_instance.add_ingress_rule(
            peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
            connection=aws_ec2.Port.tcp(22))

        kafka_client_ec2_instance_role = aws_iam.Role(
            self,
            'KafkaClientEC2InstanceRole',
            role_name='{}-KafkaClientEC2InstanceRole'.format(self.stack_name),
            assumed_by=aws_iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSSMManagedInstanceCore'),
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonMSKReadOnlyAccess')
            ])

        msk_client_ec2_instance = aws_ec2.Instance(
            self,
            'KafkaClientEC2Instance',
            instance_type=aws_ec2.InstanceType.of(
                instance_class=aws_ec2.InstanceClass.BURSTABLE2,
                instance_size=aws_ec2.InstanceSize.MICRO),
            machine_image=amzn_linux,
            vpc=vpc,
            availability_zone=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).
            availability_zones[0],
            instance_name='KafkaClientInstance',
            role=kafka_client_ec2_instance_role,
            security_group=sg_kafka_client_ec2_instance,
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC))
        msk_client_ec2_instance.add_security_group(sg_use_msk)

        commands = '''
yum update -y 
yum install python3.7 -y
yum install java-1.8.0-openjdk-devel -y

cd /home/ec2-user
echo "export PATH=.local/bin:$PATH" >> .bash_profile

mkdir -p opt
cd opt
wget https://archive.apache.org/dist/kafka/2.2.1/kafka_2.12-2.2.1.tgz
tar -xzf kafka_2.12-2.2.1.tgz
ln -nsf kafka_2.12-2.2.1 kafka

cd /home/ec2-user
wget https://bootstrap.pypa.io/get-pip.py
su -c "python3.7 get-pip.py --user" -s /bin/sh ec2-user
su -c "/home/ec2-user/.local/bin/pip3 install boto3 --user" -s /bin/sh ec2-user

chown -R ec2-user ./opt
chgrp -R ec2-user ./opt
'''

        msk_client_ec2_instance.user_data.add_commands(commands)

        cdk.CfnOutput(self,
                      'StackName',
                      value=self.stack_name,
                      export_name='StackName')
        cdk.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId')

        cdk.CfnOutput(self,
                      'MSKSecurityGroupID',
                      value=sg_msk_cluster.security_group_id,
                      export_name='MSKSecurityGroupID')
        cdk.CfnOutput(self,
                      'KafkaClientSecurityGroupID',
                      value=sg_use_msk.security_group_id,
                      export_name='KafkaClientSecurityGroupID')
        cdk.CfnOutput(self,
                      'MSKClusterArn',
                      value=msk_cluster.ref,
                      export_name='MSKClusterArn')

        cdk.CfnOutput(self,
                      'KafkaClientEC2InstancePublicDNS',
                      value=msk_client_ec2_instance.instance_public_dns_name,
                      export_name='KafkaClientEC2InstancePublicDNS')