Example #1
0
    def __init__(self, app: core.App, id: str, **kwargs) -> None:
        super().__init__(app, id, **kwargs)

        role = iam.Role(
            scope=self,
            id="SageMakerDemo1",
            role_name="SageMakerDemo1",
            assumed_by=iam.ServicePrincipal('sagemaker.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSageMakerFullAccess'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'IAMReadOnlyAccess')
            ])

        instance = sagemaker.CfnNotebookInstance(
            scope=self,
            id="My instance",
            instance_type="ml.t2.large",
            default_code_repository=
            "https://github.com/cobusbernard/sagemaker-notebooks.git",
            role_arn=role.role_arn)

        bucket = s3.Bucket(scope=self,
                           id="sagemaker-demo-cobus",
                           bucket_name="sagemaker-demo-cobus")
Example #2
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # enter your code here
        instance = sagemaker.CfnNotebookInstance(
            scope=self,
            id="My instance",
            instance_type="ml.t2.medium",
            role_arn=self.node.try_get_context("role-arn"),
            default_code_repository=
            "https://git-codecommit.us-east-1.amazonaws.com/v1/repos/AmazonSageMaker-LinearClassificationRepo",
        )
    def __init__(
        self,
        scope: core.Construct,
        construct_id: str,
        vpc: ec2.Vpc,
        role: iam_.Role,
        subnet: ec2.PrivateSubnet,
        **kwargs
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        self.security_group = ec2.SecurityGroup(
            self,
            "WorkshopGroup",
            vpc=vpc
        )

        # Create EFS inside VPC
        # self.efs = efs.FileSystem(
        #     self,
        #     "commonEFS4Notebooks",
        #     vpc = vpc,
        #     encrypted=True,
        #     enable_automatic_backups=True,
        #     performance_mode=efs.PerformanceMode('MAX_IO'),
        #     throughput_mode=efs.ThroughputMode('BURSTING'),
        #     security_group = self.security_group
        # )

        # lifecycleconfig = sm.CfnNotebookInstanceLifecycleConfig(
        #     self,
        #     "LifeCycleConfig",
        #     notebook_instance_lifecycle_config_name="LifeCycleConfig",
        #     on_create=None,
        #     on_start=code
        # )

        instance_id = "Workshop"
        sm.CfnNotebookInstance(
            self,
            instance_id,
            instance_type='ml.t2.medium',
            volume_size_in_gb=20,
            security_group_ids=[self.security_group.security_group_id],
            subnet_id=subnet.subnet_id,
            notebook_instance_name=instance_id,
            role_arn=role.role_arn,
            direct_internet_access='Enabled',
            root_access='Enabled',
            default_code_repository="https://github.com/muthukumaranR/workshop_notebooks"
        )
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        bucket1 = s3.Bucket(self,
    		"BucketCDK", 
    		versioned=True,
    		bucket_name='cdk-sagemaker-bucket-s843971001',
            removal_policy=core.RemovalPolicy.DESTROY)

        sm_notebook = sagemaker.CfnNotebookInstance(self,
        "SageMakerNotebookInstance",
         instance_type='ml.m4.xlarge',
         role_arn='arn:aws:iam::304472691870:role/aws-sagemaker-role-s843971',
         notebook_instance_name='cdk-sagemaker-notebook-s843971',
         default_code_repository='https://github.com/abdul-pfg/sagemaker-iris')
    def __init__(self, scope: core.Construct, construct_id: str, vpc: ec2.Vpc,
                 role: iam_.Role, subnet: ec2.PrivateSubnet, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        self.security_group = ec2.SecurityGroup(self, "WorkshopGroup", vpc=vpc)

        instance_id = "Workshop"
        sm.CfnNotebookInstance(
            self,
            instance_id,
            instance_type='ml.t2.medium',
            volume_size_in_gb=20,
            security_group_ids=[self.security_group.security_group_id],
            subnet_id=subnet.subnet_id,
            notebook_instance_name=instance_id,
            role_arn=role.role_arn,
            direct_internet_access='Enabled',
            root_access='Enabled',
            default_code_repository=
            "https://github.com/NASA-IMPACT/workshop_notebooks")
Example #6
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Create role for Notebook instance
        nrole = iam_.Role(self,
                          "notebookAccessRole",
                          assumed_by=iam_.ServicePrincipal("sagemaker"))

        nrole.add_managed_policy(
            iam_.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonSageMakerFullAccess'))
        nrole.add_managed_policy(
            iam_.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonEC2ReadOnlyAccess'))
        notebook_uuid = str(uuid.uuid4())
        notebook_uuid = str(notebook_uuid[0:notebook_uuid.find('-')])
        notebook_instance_id = 'spot-history-notebook-' + notebook_uuid

        notebook_instance = sagemaker_.CfnNotebookInstance(
            self,
            notebook_instance_id,
            instance_type='ml.m5.xlarge',
            volume_size_in_gb=10,
            security_group_ids=default_sg,
            subnet_id=default_subnet,
            notebook_instance_name=notebook_instance_id,
            role_arn=nrole.role_arn,
            default_code_repository=github_repo,
        )

        notebook_url = "https://{}.console.aws.amazon.com/sagemaker/home?region={}#/notebook-instances/openNotebook/{}?view=classic".format(
            my_region, my_region, notebook_instance.notebook_instance_name)

        core.CfnOutput(
            self,
            "Notebook Name",
            value=notebook_url,
            description="Notebook Instance Name",
        )
Example #7
0
    def __init__(self, scope: core.Construct, id: str, users,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        nb_role = iam.Role(
            self,
            id,
            assumed_by=iam.ServicePrincipal('sagemaker.amazonaws.com'))
        # XXX: add ro access to some S3 bucket

        for user_data in users:
            user = user_data['email']
            iam_user = iam.User(self,
                                user,
                                user_name=user,
                                password=core.SecretValue.plain_text(
                                    user_data['password']))

            nb = sagemaker.CfnNotebookInstance(
                self,
                'nb_%s' % user,
                instance_type='ml.t2.medium',
                role_arn=nb_role.role_arn,
                notebook_instance_name='nb-%s-%s' %
                (id, user.translate(str.maketrans('@.', '--'))))
            nb_policy = iam.Policy(
                self,
                'nb_policy_%s' % user,
                statements=[
                    iam.PolicyStatement(actions=[
                        'sagemaker:CreatePresignedNotebookInstanceUrl'
                    ],
                                        resources=[nb.ref]),
                    iam.PolicyStatement(
                        actions=['sagemaker:ListNotebookInstances'],
                        resources=["*"])
                ])
            iam_user.attach_inline_policy(nb_policy)
Example #8
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        bucket1 = s3.Bucket(self,
    		"BucketCDK", 
    		versioned=True,
    		bucket_name='minotaur-data-bucket',
            removal_policy=core.RemovalPolicy.DESTROY)
        
        bucket2 = s3.Bucket(self,
    		"BucketCDK1", 
    		versioned=True,
    		bucket_name='minotaur-project-files-bucket',
            removal_policy=core.RemovalPolicy.DESTROY)

        role_Arn = core.CfnParameter(self, "roleArn", type="String",
        description="Sagemaker role for IAM user")
        
        sm_notebook = sagemaker.CfnNotebookInstance(self,
        "SageMakerNotebookInstance",
         instance_type='ml.t2.medium',
         role_arn="role_Arn",
         notebook_instance_name='minotaur-notebook')
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Note: typo of role name is copied from original workshop
        mysfits_notebook_role = aws_iam.Role(
            self,
            "MysfitsNotbookRole",
            assumed_by=aws_iam.ServicePrincipal("sagemaker.amazonaws.com"),
        )

        mysfits_notebook_policy = aws_iam.PolicyStatement()
        mysfits_notebook_policy.add_actions(
            "sagemaker:*",
            "ecr:GetAuthorizationToken",
            "ecr:GetDownloadUrlForLayer",
            "ecr:BatchGetImage",
            "ecr:BatchCheckLayerAvailability",
            "cloudwatch:PutMetricData",
            "logs:CreateLogGroup",
            "logs:CreateLogStream",
            "logs:DescribeLogStreams",
            "logs:PutLogEvents",
            "logs:GetLogEvents",
            "s3:CreateBucket",
            "s3:ListBucket",
            "s3:GetBucketLocation",
            "s3:GetObject",
            "s3:PutObject",
            "s3:DeleteObject",
        )
        mysfits_notebook_policy.add_all_resources()

        mysfits_notebook_pass_role_policy = aws_iam.PolicyStatement()
        mysfits_notebook_pass_role_policy.add_actions("iam:PassRole")
        mysfits_notebook_pass_role_policy.add_all_resources()
        mysfits_notebook_pass_role_policy.add_condition(
            "StringEquals", {"iam:PassedToService": "sagemaker.amazonaws.com"})

        aws_iam.Policy(
            self,
            "MysfitsNotebookPolicy",
            statements=[
                mysfits_notebook_pass_role_policy, mysfits_notebook_policy
            ],
            roles=[mysfits_notebook_role],
        )

        notebook_instance = aws_sagemaker.CfnNotebookInstance(
            self,
            "MythicalMysfits-SageMaker-Notebook",
            instance_type="ml.t2.medium",
            role_arn=mysfits_notebook_role.role_arn,
        )

        lambda_repository = aws_codecommit.Repository(
            self,
            "RecommendationsLambdaRepository",
            repository_name="MythicalMysfits-RecommendationsLambdaRepository",
        )

        core.CfnOutput(
            self,
            "recommandationsRepositoryCloneUrlHttp",
            value=lambda_repository.repository_clone_url_http,
            description="Recommendations Lambda Repository Clone Url HTTP",
        )

        core.CfnOutput(
            self,
            "recommandationsRepositoryCloneUrlSsh",
            value=lambda_repository.repository_clone_url_ssh,
            description="Recommendations Lambda Repository Clone Url SSH",
        )

        recommendations_lambda_function_policy_statement = aws_iam.PolicyStatement(
        )
        recommendations_lambda_function_policy_statement.add_actions(
            "sagemaker:InvokeEndpoint")
        recommendations_lambda_function_policy_statement.add_all_resources()

        mysfits_recommendations = aws_lambda.Function(
            self,
            "Function",
            handler="recommendations.recommend",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            description="A microservice backend to a SageMaker endpoint",
            memory_size=128,
            code=aws_lambda.Code.asset(
                os.path.join("..", "..", "lambda-recommendations/service")),
            timeout=core.Duration.seconds(30),
            initial_policy=[recommendations_lambda_function_policy_statement],
        )

        questions_api_role = aws_iam.Role(
            self,
            "QuestionsApiRole",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
        )
        api_policy = aws_iam.PolicyStatement()
        api_policy.add_actions("lambda:InvokeFunction")
        api_policy.add_resources(mysfits_recommendations.function_arn)
        aws_iam.Policy(
            self,
            "QuestionsApiPolicy",
            policy_name="questions_api_policy",
            statements=[api_policy],
            roles=[questions_api_role],
        )

        questions_integration = aws_apigateway.LambdaIntegration(
            mysfits_recommendations,
            credentials_role=questions_api_role,
            integration_responses=[
                aws_apigateway.IntegrationResponse(
                    status_code="200",
                    response_templates={
                        "application/json": '{"status": "OK"}'
                    },
                )
            ],
        )

        api = aws_apigateway.LambdaRestApi(
            self,
            "APIEndpoint",
            handler=mysfits_recommendations,
            rest_api_name="Recommendation API Service",
            proxy=False,
        )

        recommendations_method = api.root.add_resource("recommendations")
        recommendations_method.add_method(
            "POST",
            questions_integration,
            method_responses=[
                aws_apigateway.MethodResponse(
                    status_code="200",
                    response_parameters={
                        "method.response.header.Access-Control-Allow-Headers":
                        True,
                        "method.response.header.Access-Control-Allow-Methods":
                        True,
                        "method.response.header.Access-Control-Allow-Origin":
                        True,
                    },
                )
            ],
            authorization_type=aws_apigateway.AuthorizationType.NONE,
        )

        recommendations_method.add_method(
            "OPTIONS",
            aws_apigateway.MockIntegration(
                integration_responses=[
                    aws_apigateway.IntegrationResponse(
                        status_code="200",
                        response_parameters={
                            "method.response.header.Access-Control-Allow-Headers":
                            "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent'",
                            "method.response.header.Access-Control-Allow-Origin":
                            "'*'",
                            "method.response.header.Access-Control-Allow-Credentials":
                            "'false'",
                            "method.response.header.Access-Control-Allow-Methods":
                            "'OPTIONS,GET,PUT,POST,DELETE'",
                        },
                    )
                ],
                passthrough_behavior=aws_apigateway.PassthroughBehavior.NEVER,
                request_templates={"application/json": '{"statusCode": 200}'},
            ),
            method_responses=[
                aws_apigateway.MethodResponse(
                    status_code="200",
                    response_parameters={
                        "method.response.header.Access-Control-Allow-Headers":
                        True,
                        "method.response.header.Access-Control-Allow-Methods":
                        True,
                        "method.response.header.Access-Control-Allow-Credentials":
                        True,
                        "method.response.header.Access-Control-Allow-Origin":
                        True,
                    },
                )
            ],
        )
    def __init__(
        self, 
        scope: core.Construct, 
        id: str, 
        vpc: ec2.Vpc,
        lambda_sg: ec2.SecurityGroup,
        **kwargs
    ) -> None:
        super().__init__(scope, id, **kwargs)

        self.PREFIX = id

        ## **************** Create Knowledge Analyzer Service Role **************** 
        self.service_role = aws_iam.Role.from_role_arn(
            self, f'{self.PREFIX}-IAMROLE-ServiceRole',
            f"arn:aws:iam::{self.account}:role/HEALTHLAKE-KNOWLEDGE-ANALYZER-IAMROLE-ServiceRole"
        )

        ## **************** Create a notebook Instance ****************

        self.notebook_instance_role = aws_iam.Role(
            self,
            "AmazonSageMaker-ExecutionRole-20210318",
            role_name=f'AmazonSageMaker-ExecutionRole-20210318',
            assumed_by=aws_iam.ServicePrincipal("sagemaker.amazonaws.com")
        )

        roleStmt1=aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["arn:aws:s3:::*"],
                actions=["s3:PutObject", "s3:ListObjects", "s3:GetObject"]
            )
        roleStmt2=aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["arn:aws:s3:::*"],
                actions=["s3:ListBucket", "s3:GetBucketPublicAccessBlock", "s3:GetEncryptionConfiguration"]
            )
        
        self.notebook_instance_role.add_to_policy( roleStmt1 )
        self.notebook_instance_role.add_to_policy( roleStmt2 )
        
        self.notebook_instance_role.add_managed_policy(aws_iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"))

        
        self.nbInstance = sagemaker.CfnNotebookInstance(
            self,
            f'{self.PREFIX}-HealtLake-Blog-Run',
            instance_type = 'ml.t2.medium',
            notebook_instance_name=f'{self.PREFIX}-HealtLake-Blog-Run',
            role_arn = self.notebook_instance_role.role_arn,
            )


        ## **************** Inherit VPC & Security Group **************** 
        self.vpc = vpc
        self.lambda_sg = lambda_sg


        self.comprehend_complete_sqs = sqs.CfnQueue(
            self, f'{self.PREFIX}-comprehendCompleteQueue',
            visibility_timeout = 900,
            queue_name= f'{self.PREFIX}-comprehendCompleteQueue'
        )
        self.s3export = _s3.Bucket(self, "hl-synthea-export", bucket_name = "hl-synthea-export-%s" % (core.Aws.ACCOUNT_ID), block_public_access=_s3.BlockPublicAccess.BLOCK_ALL, encryption=_s3.BucketEncryption.S3_MANAGED, removal_policy = core.RemovalPolicy.DESTROY, auto_delete_objects= True,)
        self.s3_loc = _s3.Bucket(self, "hl-synthea-source", bucket_name = "hl-synthea-source-%s" % (core.Aws.ACCOUNT_ID), block_public_access=_s3.BlockPublicAccess.BLOCK_ALL, removal_policy = core.RemovalPolicy.DESTROY, auto_delete_objects= True,)
        
        self.kendra_instance_role = aws_iam.CfnRole(
            self,
            f'{self.PREFIX}-Kendra-ServiceRole',
            role_name=f'{self.PREFIX}-Kendra-ServiceRole',
            assume_role_policy_document=aws_iam.PolicyDocument(
                statements=[
                    aws_iam.PolicyStatement(
                        effect=aws_iam.Effect.ALLOW,
                        actions=[ "sts:AssumeRole" ],
                        principals=[ aws_iam.ServicePrincipal("kendra.amazonaws.com") ]
                    )
                ]
            ),
            policies=[
                aws_iam.CfnRole.PolicyProperty(
                    policy_document=aws_iam.PolicyDocument(
                        statements=[
                            aws_iam.PolicyStatement(
                                effect=aws_iam.Effect.ALLOW,
                                actions=[
                                    "s3:GetObject",
                                    "s3:ListBucket",
                                    "s3:GetBucketPublicAccessBlock",
                                    "s3:GetEncryptionConfiguration"                                    
                                ],
                                resources=["*"]
                            )
                        ]
                    ),
                    policy_name="KendraAllowS3GetListObject"
                ),
                aws_iam.CfnRole.PolicyProperty(
                    policy_document=aws_iam.PolicyDocument(
                        statements=[
                            aws_iam.PolicyStatement(
                                effect=aws_iam.Effect.ALLOW,
                                actions=[
                                    "cloudwatch:PutMetricData"
                                ],
                                resources=["*"],
                                conditions = {
                                    "StringEquals": {
                                        'cloudwatch:namespace': 'AWS/Kendra',
                                    },                                    
                                }
                            )
                        ]
                    ),
                    policy_name="KendraAllowMetricObject"
                ),                
                aws_iam.CfnRole.PolicyProperty(
                    policy_document=aws_iam.PolicyDocument(
                        statements=[
                            aws_iam.PolicyStatement(
                                effect=aws_iam.Effect.ALLOW,
                                actions=[
                                    'logs:DescribeLogGroups',
                                    'logs:CreateLogGroup',
                                ],
                                resources=[
                                    "arn:" + "aws" + ":logs:" + core.Aws.REGION + ":" + core.Aws.ACCOUNT_ID + ":log-group:" + "/aws/kendra/*"
                                ],
                            )
                        ]
                    ),
                    policy_name="KendraAllowLogObject"
                ),   
                aws_iam.CfnRole.PolicyProperty(
                    policy_document=aws_iam.PolicyDocument(
                        statements=[
                            aws_iam.PolicyStatement(
                                effect=aws_iam.Effect.ALLOW,
                                actions=[
                                    'logs:DescribeLogStreams',
                                    'logs:CreateLogStream',
                                    'logs:PutLogEvents',
                                ],
                                resources=[
                                    "arn:" + "aws" + ":logs:" + core.Aws.REGION + ":" + core.Aws.ACCOUNT_ID + ":log-group:" + "/aws/kendra/*:log-stream:*" 
                                ],
                            )
                        ]
                    ),
                    policy_name="KendraAllowLogStreamsObject"
                ), 
            ],
        )   

    
        self.indexKendra = kendra.CfnIndex(
            self, f'{self.PREFIX}-KendraIndex',
            edition = 'DEVELOPER_EDITION',
            name = f'{self.PREFIX}-HealthLakeNotes',
            role_arn = self.kendra_instance_role.attr_arn,
        )


#--------
    # kendra data source role
    
        self.kendra_data_source_instance_role = aws_iam.Role(
            self,
            f'{self.PREFIX}-KDSrc-ServiceRole', 
            role_name=f'{self.PREFIX}-KDSrc-ServiceRole',
            assumed_by=aws_iam.ServicePrincipal('kendra.amazonaws.com'))
    
    
        self.kendra_data_source_instance_role.add_to_policy(aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                'kendra:BatchPutDocument',
                'kendra:BatchDeleteDocument',
                ],
            resources=[self.indexKendra.attr_arn]
        ))
        


        self.s3_loc.grant_read(self.kendra_data_source_instance_role)

        # print(dir(kendra.CfnDataSource.DataSourceInclusionsExclusionsStringsProperty))
        
        self.datasourceKendra = kendra.CfnDataSource(
            self, f'{self.PREFIX}-Data-S3-HealthLake',
            name = f'{self.PREFIX}-Data-S3-HealthLake',
            index_id = self.indexKendra.attr_id,
            type = 'S3',
            data_source_configuration=kendra.CfnDataSource.DataSourceConfigurationProperty(
                s3_configuration=kendra.CfnDataSource.S3DataSourceConfigurationProperty(
                    bucket_name=self.s3_loc.bucket_name,
                    # inclusion_prefixes=kendra.CfnDataSource.DataSourceInclusionsExclusionsStringsProperty(
                    #     data_source_inclusions_exclusions_strings=["source/"]
                    # ),
                )
            ),
            role_arn = self.kendra_data_source_instance_role.role_arn,
        )
        
        self.datasourceKendra.add_override("Properties.DataSourceConfiguration.S3Configuration.InclusionPrefixes", ['source/'])
  


        ## **************** Create resources **************** 
        self.createLambdaFunctions()
        self.setLambdaTriggers()

        

        # QueuePolicy
        self.queue_policy = sqs.CfnQueuePolicy(self, "QueuePolicy", 
            queues = [self.comprehend_complete_sqs.ref],
            policy_document = {
                "Version" : "2008-10-17",
                "Id" : "__default_policy_ID",
                "Statement" : [{
                    "Sid" : "__owner_statement",
                    "Effect" : "Allow",
                    "Principal" : {
                        "AWS": "*"
                    },
                    "Action" : "SQS:SendMessage",
                    "Resource" : f'arn:aws:sqs:us-east-1:{core.Aws.ACCOUNT_ID}:{self.comprehend_complete_sqs.queue_name}', # self.comprehend_complete_sqs.ref
                    "Condition": {
                        "StringEquals": {
                        "aws:SourceAccount": f'{core.Aws.ACCOUNT_ID}'
                        },
                        "ArnLike": {
                        "aws:SourceArn": f'arn:aws:s3:*:*:{self.s3_loc.bucket_name}'
                        }
                    }
                }]                    
            }
        )
Example #11
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # CONTAINER_IMAGE = 'daskdev/dask:0.19.4'
        # if use_rapids:
        #   CONTAINER_IMAGE = 'rapidsai/rapidsai:latest'

        # if use_notebook:
        #   CONTAINER_IMAGE = 'daskdev/dask-notebook:latest'

        #TODO : Create ECR repository
        #Update: Not required sunce ecs.ContainerImage already creates and pushes using same asset

        #ecr = aws_ecr.Repository(self, 'MyECR', repository_name='dask')
        # not needed if you use an asset like below:

        dockercontainer = ecs.ContainerImage.from_asset(
            directory='dockerstuff', build_args=['-t dask .'])

        # Create vpc
        vpc = ec2.Vpc(self, 'MyVpc', max_azs=3)  # default is all AZs in region
        subnets = vpc.private_subnets

        # Create log groups for the scheduler and workers
        s_logs = logs.LogGroup(self, 'SlogGroup', log_group_name='SlogGroup')
        w_logs = logs.LogGroup(self, 'WlogGroup', log_group_name='WlogGroup')

        #Create private namespace
        #nspace = sd.PrivateDnsNamespace(self, 'MyNamespace', vpc=vpc, name='local-dask')

        # #Create role for ECS
        nRole = iam_.Role(self,
                          'ECSExecutionRole',
                          assumed_by=iam_.ServicePrincipal('ecs-tasks'))

        nPolicy = iam_.Policy(
            self,
            "ECSExecutionPolicy",
            policy_name="ECSExecutionPolicy",
            statements=[
                iam_.PolicyStatement(actions=[
                    'ecr:BatchCheckLayerAvailability',
                    'ecr:GetDownloadUrlForLayer', 'ecr:BatchGetImage',
                    'ecr:GetAuthorizationToken', 'logs:CreateLogStream',
                    'logs:PutLogEvents', 'sagemaker:*', 's3:*'
                ],
                                     resources=[
                                         '*',
                                     ]),
            ]).attach_to_role(nRole)

        # Create ECS cluster
        cluster = ecs.Cluster(self,
                              'DaskCluster',
                              vpc=vpc,
                              cluster_name='Fargate-Dask-Cluster')

        nspace = cluster.add_default_cloud_map_namespace(
            name='local-dask', type=sd.NamespaceType.DNS_PRIVATE, vpc=vpc)

        #TO DO: Use default namespace for cluster and use cmap options within fargate service
        #Update: done

        # schedulerRegistry = sd.Service(self,'serviceRegistryScheduler',
        #     namespace=nspace,dns_ttl=core.Duration.seconds(60),
        #     custom_health_check=sd.HealthCheckCustomConfig(failure_threshold=10),
        #     name='Dask-Scheduler')

        # # schedulerRegistry.register_ip_instance(id='serviceRegistryScheduler',ipv4='')

        # workerRegistry = sd.Service(self,'workerRegistryScheduler',
        #     namespace=nspace,dns_ttl=core.Duration.seconds(60),
        #     custom_health_check=sd.HealthCheckCustomConfig(failure_threshold=10),
        #     name='Dask-Worker')

        # -------------------- Add scheduler task ------------------------
        schedulerTask = ecs.TaskDefinition(
            self,
            'taskDefinitionScheduler',
            compatibility=ecs.Compatibility.FARGATE,
            cpu='4096',
            memory_mib='8192',
            network_mode=ecs.NetworkMode.AWS_VPC,
            placement_constraints=None,
            execution_role=nRole,
            family='Dask-Scheduler',
            task_role=nRole)

        schedulerTask.add_container('MySchedulerImage',
                                    image=dockercontainer,
                                    command=['dask-scheduler'],
                                    cpu=4096,
                                    essential=True,
                                    logging=ecs.LogDriver.aws_logs(
                                        stream_prefix='ecs', log_group=s_logs),
                                    memory_limit_mib=8192,
                                    memory_reservation_mib=8192)

        # -------------------- Add worker task -----------------------------
        workerTask = ecs.TaskDefinition(
            self,
            'taskDefinitionWorker',
            compatibility=ecs.Compatibility.FARGATE,
            cpu='4096',
            memory_mib='8192',
            network_mode=ecs.NetworkMode.AWS_VPC,
            placement_constraints=None,
            execution_role=nRole,
            family='Dask-Worker',
            task_role=nRole)

        workerTask.add_container(
            'MyWorkerImage',
            image=dockercontainer,
            command=[
                'dask-worker', 'dask-scheduler.local-dask:8786',
                '--memory-limit 1800MB', '--worker-port 9000',
                '--nanny-port 9001', '--bokeh-port 9002'
            ],
            cpu=4096,
            essential=True,
            logging=ecs.LogDriver.aws_logs(stream_prefix='ecs',
                                           log_group=s_logs),
            memory_limit_mib=8192,
            memory_reservation_mib=8192)

        # Task security group
        sg = ec2.SecurityGroup(self,
                               'MySG',
                               vpc=vpc,
                               description='Enable Scheduler ports access',
                               security_group_name='DaskSecurityGroup')

        # Ingress rule requires IPeer not Peer
        # TO DO: fix from any ipv4 to SG
        p1 = ec2.Peer().ipv4('0.0.0.0/0')
        p2 = ec2.Peer().ipv4('0.0.0.0/0')

        sg.add_ingress_rule(peer=p1,
                            connection=ec2.Port(protocol=ec2.Protocol.TCP,
                                                string_representation='p1',
                                                from_port=8786,
                                                to_port=8789))

        sg.add_ingress_rule(peer=p2,
                            connection=ec2.Port(protocol=ec2.Protocol.TCP,
                                                string_representation='p2',
                                                from_port=9000,
                                                to_port=9002))

        # ----------------- Add Scheduler Service -----------------------

        # deployconfig = ecs.CfnService.DeploymentConfigurationProperty(maximum_percent=200,minimum_healthy_percent=100)

        # vpcconfig = ecs.CfnService.AwsVpcConfigurationProperty(subnets = subnets,assign_public_ip=True,security_groups=[sg])

        # networkconfig = ecs.CfnService.NetworkConfigurationProperty(awsvpc_configuration=vpcconfig)

        # schedulerService = ecs.CfnService(self, 'DaskSchedulerService',
        #     task_definition = schedulerTask, deployment_configuration=deployconfig,
        #     cluster=cluster, desired_count=1, enable_ecs_managed_tags=None,
        #     launch_type='FARGATE',network_configuration=networkconfig,
        #     service_registries=schedulerRegistry)

        #ecs.CfnService.ServiceRegistryProperty()

        # Try fargate service? No service registry option available
        #using default cluster namespace
        cmap1 = ecs.CloudMapOptions(dns_ttl=core.Duration.seconds(60),
                                    failure_threshold=10,
                                    name='Dask-Scheduler')

        schedulerService = ecs.FargateService(
            self,
            'DaskSchedulerService',
            task_definition=schedulerTask,
            assign_public_ip=True,
            security_group=sg,
            #vpc_subnets=subnets,
            cluster=cluster,
            desired_count=1,
            max_healthy_percent=200,
            min_healthy_percent=100,
            service_name='Dask-Scheduler',
            cloud_map_options=cmap1)

        # schedulerService.enable_cloud_map(name = 'serviceRegistryScheduler')
        # schedulerRegistry.register_non_ip_instance(self,instance_id='DaskSchedulerService')

        # ----------------- Add Worker Service -----------------------
        #using default cluster namespace
        cmap2 = ecs.CloudMapOptions(dns_ttl=core.Duration.seconds(60),
                                    failure_threshold=10,
                                    name='Dask-Worker')

        workerService = ecs.FargateService(
            self,
            'DaskWorkerService',
            task_definition=workerTask,
            assign_public_ip=True,
            security_group=sg,
            #vpc_subnets=subnets,
            cluster=cluster,
            desired_count=1,
            max_healthy_percent=200,
            min_healthy_percent=100,
            service_name='Dask-Worker',
            cloud_map_options=cmap2)

        # workerService.enable_cloud_map(name = 'workerRegistryScheduler')

        #------------------------------------------------------------------------

        # Very less control with ECS patterns, did not work

        # ecs_patterns.ApplicationLoadBalancedFargateService(self, "DaskFargateStack",
        #     cluster=cluster,            # Required
        #     cpu=512,                    # Default is 256
        #     desired_count=6,            # Default is 1
        #     task_image_options=ecs_patterns.ApplicationLoadBalancedTaskImageOptions(
        #         image=ecs.ContainerImage.from_registry(CONTAINER_IMAGE)),
        #     memory_limit_mib=2048,      # Default is 512
        #     public_load_balancer=True)  # Default is False

        # Start a notebook in the same vpc
        # print(type(sg.security_group_id))
        # print("------------------------------")
        # print(subnets[0].subnet_id)
        #Create role for Notebook instance
        smRole = iam_.Role(self,
                           "notebookAccessRole",
                           assumed_by=iam_.ServicePrincipal('sagemaker'))

        smPolicy = iam_.Policy(self,
                               "notebookAccessPolicy",
                               policy_name="notebookAccessPolicy",
                               statements=[
                                   iam_.PolicyStatement(
                                       actions=['s3:*', 'ecs:*'],
                                       resources=[
                                           '*',
                                       ]),
                               ]).attach_to_role(smRole)

        notebook = sagemaker_.CfnNotebookInstance(
            self,
            'DaskNotebook',
            instance_type='ml.t2.medium',
            volume_size_in_gb=50,
            security_group_ids=[sg.security_group_id],
            subnet_id=subnets[0].subnet_id,
            notebook_instance_name='DaskNotebook',
            role_arn=smRole.role_arn,
            root_access='Enabled',
            direct_internet_access='Enabled',
            default_code_repository=
            'https://github.com/w601sxs/dask-examples.git')
    def __init__(self, app: core.App, id: str) -> None:
        super().__init__(app, id)

        #Create mount instance
        efs = efs_.CfnFileSystem(self,
                                 "commonEFS4Notebooks",
                                 encrypted=False,
                                 performance_mode='generalPurpose',
                                 throughput_mode='bursting')

        print(efs.ref)
        #Create mount target
        mount = efs_.CfnMountTarget(
            self,
            "MountID",
            file_system_id=efs.ref,
            security_groups=default_sg,
            subnet_id=default_subnet,
        )

        #Create role for Notebook instance
        nRole = iam_.Role(self,
                          "notebookAccessRole",
                          assumed_by=iam_.ServicePrincipal('sagemaker'))

        nPolicy = iam_.Policy(self,
                              "notebookAccessPolicy",
                              policy_name="notebookAccessPolicy",
                              statements=[
                                  iam_.PolicyStatement(actions=[
                                      's3:*',
                                  ],
                                                       resources=[
                                                           '*',
                                                       ]),
                              ]).attach_to_role(nRole)

        #Create notebook instances cluster
        instances = []
        print(mount.get_att('attr_ip_address').to_string())
        encodedScript = LifecycleScriptStr.format(efs.ref)

        print("Adding following script to the lifecycle config..\n___\n\n" +
              encodedScript)

        code = [{"content": core.Fn.base64(encodedScript)}]

        lifecycleconfig = sagemaker_.CfnNotebookInstanceLifecycleConfig(
            self,
            LifeCycleConfigName,
            notebook_instance_lifecycle_config_name=LifeCycleConfigName,
            on_create=None,
            on_start=code)

        for i in range(num_instances):
            nid = 'CDK-Notebook-Instance-User-' + str(i)
            instances.append(
                sagemaker_.CfnNotebookInstance(
                    self,
                    nid,
                    instance_type='ml.t2.medium',
                    volume_size_in_gb=5,
                    security_group_ids=default_sg,
                    subnet_id=default_subnet,
                    notebook_instance_name=nid,
                    role_arn=nRole.role_arn,
                    lifecycle_config_name=LifeCycleConfigName,
                ))
Example #13
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        SAGEMAKER_NOTEBOOK_INSTANCE_TYPE = cdk.CfnParameter(
            self,
            'SageMakerNotebookInstanceType',
            type='String',
            description='Amazon SageMaker Notebook instance type',
            default='ml.t2.medium')

        #XXX: For createing Amazon MWAA in the existing VPC,
        # remove comments from the below codes and
        # comments out vpc = aws_ec2.Vpc(..) codes,
        # then pass -c vpc_name=your-existing-vpc to cdk command
        # for example,
        # cdk -c vpc_name=your-existing-vpc syth
        #
        vpc_name = self.node.try_get_context('vpc_name')
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      'ExistingVPC',
                                      is_default=True,
                                      vpc_name=vpc_name)

        #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack.
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html
        # vpc = aws_ec2.Vpc(self, 'SageMakerStudioVPC',
        #   max_azs=2,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        sg_sagemaker_notebook_instance = aws_ec2.SecurityGroup(
            self,
            "SageMakerNotebookSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='Security group with no ingress rule',
            security_group_name='sagemaker-nb-{}-sg'.format(''.join(
                random.sample((string.ascii_letters), k=5))))
        sg_sagemaker_notebook_instance.add_ingress_rule(
            peer=sg_sagemaker_notebook_instance,
            connection=aws_ec2.Port.all_traffic(),
            description='sagemaker notebook security group')
        cdk.Tags.of(sg_sagemaker_notebook_instance).add(
            'Name', 'sagemaker-nb-sg')

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": ["arn:aws:s3:::*"],
                    "actions": [
                        "s3:GetObject", "s3:PutObject", "s3:DeleteObject",
                        "s3:ListBucket"
                    ]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookRole',
            role_name='SageMakerNotebookRole-{suffix}'.format(
                suffix=''.join(random.sample((string.ascii_letters), k=5))),
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            inline_policies={
                'sagemaker-custome-execution-role':
                sagemaker_notebook_role_policy_doc
            },
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSageMakerFullAccess'),
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSCloudFormationReadOnlyAccess')
            ])

        #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance
        # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport
        sagemaker_nb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'

echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
tar zxfv mecab-0.996-ko-0.9.2.tar.gz
pushd mecab-0.996-ko-0.9.2
./configure
make
make check
sudo make install
sudo ldconfig
mecab -v
mecab-config --version
popd

curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
tar -zxvf mecab-ko-dic-2.1.1-20180720.tar.gz
pushd mecab-ko-dic-2.1.1-20180720
./autogen.sh
./configure
make
sudo make install
popd

for each in python3 pytorch_latest_p36
do
    source /home/ec2-user/anaconda3/bin/activate ${{each}}
    pip install --upgrade pretty_errors
    pip install --upgrade pandas-profiling[notebook]
    pip install --upgrade ipython-sql
    pip install --upgrade PyMySQL
    pip install torchvision
    pip install torchtext
    pip install spacy
    pip install nltk
    pip install requests
    pip install mecab-python
    pip install konlpy
    pip install jpype1-py3
    conda deactivate
done
EOF
'''.format(AWS_Region=cdk.Aws.REGION)

        sagemaker_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=cdk.Fn.base64(sagemaker_nb_lifecycle_content))

        sagemaker_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'SageMakerNotebookLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'SageMakerNotebookLifeCycleConfig',
            on_start=[sagemaker_lifecycle_config_prop])

        sagemaker_notebook_instance = aws_sagemaker.CfnNotebookInstance(
            self,
            'SageMakerNotebookInstance',
            instance_type=SAGEMAKER_NOTEBOOK_INSTANCE_TYPE.value_as_string,
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=sagemaker_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='MySageMakerWorkbook',
            root_access='Disabled',
            security_group_ids=[
                sg_sagemaker_notebook_instance.security_group_id
            ],
            subnet_id=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids[0])
    def __init__(self, scope: core.Construct, id: str, *, prefix: str,
                 environment: str, configuration, **kwargs):
        """
        :param scope: Stack class, used by CDK.
        :param id: ID of the construct, used by CDK.
        :param prefix: Prefix of the construct, used for naming purposes.
        :param environment: Environment of the construct, used for naming purposes.
        :param configuration: Configuration of the construct. In this case SAGEMAKER_NOTEBOOK.
        :param kwargs: Other parameters that could be used by the construct.
        """
        super().__init__(scope, id, **kwargs)
        self.prefix = prefix
        self.environment_ = environment
        self._configuration = configuration

        # Validating that the payload passed is correct
        validate_configuration(configuration_schema=SAGEMAKER_NOTEBOOK_SCHEMA,
                               configuration_received=configuration)

        base_name = self._configuration["name"]
        on_create_list = list()
        if validate_file(
                file_path=self._configuration["scripts"]["on_create"]):
            on_create_file = self._configuration["scripts"]["on_create"]
        else:
            on_create_file = file_path + "/scripts/iot_analytics_notebook/on_create.sh"
        with open(on_create_file) as on_create:
            on_create_contents = {"content": core.Fn.base64(on_create.read())}
            on_create_list.append(on_create_contents)

        on_start_list = list()
        if validate_file(
                file_path=self._configuration["scripts"]["on_create"]):
            on_start_file = self._configuration["scripts"]["on_start"]
        else:
            on_start_file = file_path + "/scripts/iot_analytics_notebook/on_start.sh"
        with open(on_start_file) as on_start:
            on_start_contents = {"content": core.Fn.base64(on_start.read())}
            on_start_list.append(on_start_contents)

        lifecycle_configuration_name = self.prefix + "-" + base_name + "lifecycle-" + self.environment_
        self._lifecycle_configuration = sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            id=lifecycle_configuration_name,
            notebook_instance_lifecycle_config_name=
            lifecycle_configuration_name,
            on_create=on_create_list,
            on_start=on_start_list,
        )

        role_name = self.prefix + "_" + base_name + "sagemaker_role_" + self.environment_
        self._role = iam.Role(
            self,
            id=role_name,
            role_name=role_name,
            assumed_by=iam.ServicePrincipal(service="sagemaker.amazonaws.com"))

        managed_policy = iam.ManagedPolicy.from_aws_managed_policy_name(
            managed_policy_name="AmazonSageMakerFullAccess")
        self._role.add_managed_policy(policy=managed_policy)

        policy_name = self.prefix + "_" + base_name + "sagemaker_policy" + self.environment_
        ecr_statement = iam.PolicyStatement(
            actions=SAGEMAKER_POLICY["ecr_actions"],
            resources=SAGEMAKER_POLICY["ecr_resources"])
        s3_statement = iam.PolicyStatement(
            actions=SAGEMAKER_POLICY["s3_actions"],
            resources=SAGEMAKER_POLICY["s3_resources"])
        policy = iam.Policy(self,
                            id=policy_name,
                            policy_name=policy_name,
                            statements=[ecr_statement, s3_statement])
        self._role.attach_inline_policy(policy=policy)

        sagemaker_notebook_name = self.prefix + "_" + base_name + "sagemaker_notebook_" + self.environment_
        self._sagemaker_notebook = sagemaker.CfnNotebookInstance(
            self,
            id=sagemaker_notebook_name,
            notebook_instance_name=sagemaker_notebook_name,
            lifecycle_config_name=self._lifecycle_configuration.
            notebook_instance_lifecycle_config_name,
            role_arn=self._role.role_arn,
            instance_type=self._configuration["instance_type"],
        )
Example #15
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        vpc_name = self.node.try_get_context("vpc_name")
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      "ExistingVPC",
                                      is_default=True,
                                      vpc_name=vpc_name)

        sg_use_mysql = aws_ec2.SecurityGroup(
            self,
            'MySQLClientSG',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for mysql client',
            security_group_name='use-mysql-sg')
        core.Tags.of(sg_use_mysql).add('Name', 'mysql-client-sg')

        sg_mysql_server = aws_ec2.SecurityGroup(
            self,
            'MySQLServerSG',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for mysql',
            security_group_name='mysql-server-sg')
        sg_mysql_server.add_ingress_rule(peer=sg_use_mysql,
                                         connection=aws_ec2.Port.tcp(3306),
                                         description='use-mysql-sg')
        core.Tags.of(sg_mysql_server).add('Name', 'mysql-server-sg')

        rds_subnet_group = aws_rds.SubnetGroup(
            self,
            'RdsSubnetGroup',
            description='subnet group for mysql',
            subnet_group_name='aurora-mysql',
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PRIVATE),
            vpc=vpc)

        rds_engine = aws_rds.DatabaseClusterEngine.aurora_mysql(
            version=aws_rds.AuroraMysqlEngineVersion.VER_2_08_1)

        rds_cluster_param_group = aws_rds.ParameterGroup(
            self,
            'AuroraMySQLClusterParamGroup',
            engine=rds_engine,
            description='Custom cluster parameter group for aurora-mysql5.7',
            parameters={
                'innodb_flush_log_at_trx_commit': '2',
                'slow_query_log': '1',
                'tx_isolation': 'READ-COMMITTED',
                'wait_timeout': '300',
                'character-set-client-handshake': '0',
                'character_set_server': 'utf8mb4',
                'collation_server': 'utf8mb4_unicode_ci',
                'init_connect': 'SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci'
            })

        rds_db_param_group = aws_rds.ParameterGroup(
            self,
            'AuroraMySQLDBParamGroup',
            engine=rds_engine,
            description='Custom parameter group for aurora-mysql5.7',
            parameters={
                'slow_query_log': '1',
                'tx_isolation': 'READ-COMMITTED',
                'wait_timeout': '300',
                'init_connect': 'SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci'
            })

        db_cluster_name = self.node.try_get_context('db_cluster_name')
        #    #XXX: aws_rds.Credentials.from_username(username, ...) can not be given user specific Secret name
        #    #XXX: therefore, first create Secret and then use it to create database
        #    db_secret_name = self.node.try_get_context('db_secret_name')
        #    #XXX: arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
        #    db_secret_arn = 'arn:aws:secretsmanager:{region}:{account}:secret:{resource_name}'.format(
        #      region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, resource_name=db_secret_name)
        #    db_secret = aws_secretsmanager.Secret.from_secret_arn(self, 'DBSecretFromArn', db_secret_arn)
        #    rds_credentials = aws_rds.Credentials.from_secret(db_secret)
        rds_credentials = aws_rds.Credentials.from_generated_secret("admin")
        db_cluster = aws_rds.DatabaseCluster(
            self,
            'Database',
            engine=rds_engine,
            credentials=rds_credentials,
            instance_props={
                'instance_type':
                aws_ec2.InstanceType.of(aws_ec2.InstanceClass.BURSTABLE3,
                                        aws_ec2.InstanceSize.MEDIUM),
                'parameter_group':
                rds_db_param_group,
                'vpc_subnets': {
                    'subnet_type': aws_ec2.SubnetType.PRIVATE
                },
                'vpc':
                vpc,
                'auto_minor_version_upgrade':
                False,
                'security_groups': [sg_mysql_server]
            },
            instances=2,
            parameter_group=rds_cluster_param_group,
            cloudwatch_logs_retention=aws_logs.RetentionDays.THREE_DAYS,
            cluster_identifier=db_cluster_name,
            subnet_group=rds_subnet_group,
            backup=aws_rds.BackupProps(retention=core.Duration.days(3),
                                       preferred_window="03:00-04:00"))

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect": aws_iam.Effect.ALLOW,
                    "resources": [db_cluster.secret.secret_full_arn],
                    "actions": ["secretsmanager:GetSecretValue"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookRoleForRDS',
            role_name='AWSSageMakerNotebookRoleForRDS',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            inline_policies={
                'AuroraMySQLSecretPolicy': sagemaker_notebook_role_policy_doc
            })

        cf_readonly_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            'AWSCloudFormationReadOnlyAccess')
        sagemaker_notebook_role.add_managed_policy(cf_readonly_access_policy)

        #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance
        # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport
        rds_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
source /home/ec2-user/anaconda3/bin/activate python3
pip install --upgrade ipython-sql
pip install --upgrade PyMySQL 
pip install --upgrade pretty_errors
source /home/ec2-user/anaconda3/bin/deactivate
cd /home/ec2-user/SageMaker
wget -N https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem
wget -N https://raw.githubusercontent.com/ksmin23/my-aws-cdk-examples/main/rds/sagemaker-aurora_mysql/ipython-sql.ipynb
EOF
'''.format(AWS_Region=core.Aws.REGION)

        rds_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(rds_wb_lifecycle_content))

        rds_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'MySQLWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'MySQLWorkbenchLifeCycleConfig',
            on_start=[rds_wb_lifecycle_config_prop])

        rds_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'AuroraMySQLWorkbench',
            instance_type='ml.t3.xlarge',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=rds_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='AuroraMySQLWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_mysql.security_group_name],
            subnet_id=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids[0])

        core.CfnOutput(self,
                       'StackName',
                       value=self.stack_name,
                       export_name='StackName')
        core.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId')

        core.CfnOutput(self,
                       'DBClusterName',
                       value=db_cluster.cluster_identifier,
                       export_name='DBClusterName')
        core.CfnOutput(self,
                       'DBCluster',
                       value=db_cluster.cluster_endpoint.socket_address,
                       export_name='DBCluster')
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_secretsmanager/README.html
        # secret_arn="arn:aws:secretsmanager:<region>:<account-id-number>:secret:<secret-name>-<random-6-characters>",
        core.CfnOutput(self,
                       'DBSecret',
                       value=db_cluster.secret.secret_name,
                       export_name='DBSecret')

        core.CfnOutput(self,
                       'SageMakerRole',
                       value=sagemaker_notebook_role.role_name,
                       export_name='SageMakerRole')
        core.CfnOutput(self,
                       'SageMakerNotebookInstance',
                       value=rds_workbench.notebook_instance_name,
                       export_name='SageMakerNotebookInstance')
        core.CfnOutput(self,
                       'SageMakerNotebookInstanceLifecycleConfig',
                       value=rds_workbench.lifecycle_config_name,
                       export_name='SageMakerNotebookInstanceLifecycleConfig')
Example #16
0
  def __init__(self, scope: Construct, id: str, **kwargs) -> None:
    super().__init__(scope, id, **kwargs)

    # The code that defines your stack goes here
    vpc = aws_ec2.Vpc(self, "NeptuneHolVPC",
      max_azs=2,
      gateway_endpoints={
        "S3": aws_ec2.GatewayVpcEndpointOptions(
          service=aws_ec2.GatewayVpcEndpointAwsService.S3
        )
      }
    )

    sg_use_graph_db = aws_ec2.SecurityGroup(self, "NeptuneClientSG",
      vpc=vpc,
      allow_all_outbound=True,
      description='security group for neptune client',
      security_group_name='use-neptune-client'
    )
    cdk.Tags.of(sg_use_graph_db).add('Name', 'use-neptune-client')

    sg_graph_db = aws_ec2.SecurityGroup(self, "NeptuneSG",
      vpc=vpc,
      allow_all_outbound=True,
      description='security group for neptune',
      security_group_name='neptune-server'
    )
    cdk.Tags.of(sg_graph_db).add('Name', 'neptune-server')

    sg_graph_db.add_ingress_rule(peer=sg_graph_db, connection=aws_ec2.Port.tcp(8182), description='neptune-server')
    sg_graph_db.add_ingress_rule(peer=sg_use_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-neptune-client')

    graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(self, 'NeptuneHolSubnetGroup',
      db_subnet_group_description='subnet group for neptune hol',
      subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids,
      db_subnet_group_name='neptune-hol'
    )

    graph_db = aws_neptune.CfnDBCluster(self, 'NeptuneHol',
      availability_zones=vpc.availability_zones,
      db_subnet_group_name=graph_db_subnet_group.db_subnet_group_name,
      db_cluster_identifier='neptune-hol',
      backup_retention_period=1,
      preferred_backup_window='08:45-09:15',
      preferred_maintenance_window='sun:18:00-sun:18:30',
      vpc_security_group_ids=[sg_graph_db.security_group_id]
    )
    graph_db.add_depends_on(graph_db_subnet_group)

    graph_db_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolInstance',
      db_instance_class='db.r5.large',
      allow_major_version_upgrade=False,
      auto_minor_version_upgrade=False,
      availability_zone=vpc.availability_zones[0],
      db_cluster_identifier=graph_db.db_cluster_identifier,
      db_instance_identifier='neptune-hol',
      preferred_maintenance_window='sun:18:00-sun:18:30'
    )
    graph_db_instance.add_depends_on(graph_db)

    graph_db_replica_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolReplicaInstance',
      db_instance_class='db.r5.large',
      allow_major_version_upgrade=False,
      auto_minor_version_upgrade=False,
      availability_zone=vpc.availability_zones[-1],
      db_cluster_identifier=graph_db.db_cluster_identifier,
      db_instance_identifier='neptune-hol-replica',
      preferred_maintenance_window='sun:18:00-sun:18:30'
    )
    graph_db_replica_instance.add_depends_on(graph_db)
    graph_db_replica_instance.add_depends_on(graph_db_instance)

    sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
    sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{
      "effect": aws_iam.Effect.ALLOW,
      "resources": ["arn:aws:s3:::aws-neptune-notebook",
        "arn:aws:s3:::aws-neptune-notebook/*"],
      "actions": ["s3:GetObject",
        "s3:ListBucket"]
    }))

    sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{
      "effect": aws_iam.Effect.ALLOW,
      "resources": ["arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".format(
        region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, cluster_id=graph_db.attr_cluster_resource_id)],
      "actions": ["neptune-db:connect"]
    }))

    sagemaker_notebook_role = aws_iam.Role(self, 'SageMakerNotebookForNeptuneWorkbenchRole',
      role_name='AWSNeptuneNotebookRole-NeptuneHol',
      assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
      #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
      inline_policies={
        'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
      }
    )

    neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'

echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc

aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=graph_db.attr_endpoint,
    NeptuneClusterPort=graph_db.attr_port,
    AWS_Region=cdk.Aws.REGION)

    neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
      content=cdk.Fn.base64(neptune_wb_lifecycle_content)
    )

    neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(self, 'NpetuneWorkbenchLifeCycleConfig',
      notebook_instance_lifecycle_config_name='NeptuneWorkbenchLifeCycleConfig',
      on_start=[neptune_wb_lifecycle_config_prop]
    )

    neptune_workbench = aws_sagemaker.CfnNotebookInstance(self, 'NeptuneWorkbench',
      instance_type='ml.t2.medium',
      role_arn=sagemaker_notebook_role.role_arn,
      lifecycle_config_name=neptune_wb_lifecycle_config.notebook_instance_lifecycle_config_name,
      notebook_instance_name='NeptuneHolWorkbench',
      root_access='Disabled',
      security_group_ids=[sg_use_graph_db.security_group_id],
      subnet_id=graph_db_subnet_group.subnet_ids[0]
    )
Example #17
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        vpc = aws_ec2.Vpc(
            self,
            "OctemberVPC",
            max_azs=2,
            #      subnet_configuration=[{
            #          "cidrMask": 24,
            #          "name": "Public",
            #          "subnetType": aws_ec2.SubnetType.PUBLIC,
            #        },
            #        {
            #          "cidrMask": 24,
            #          "name": "Private",
            #          "subnetType": aws_ec2.SubnetType.PRIVATE
            #        },
            #        {
            #          "cidrMask": 28,
            #          "name": "Isolated",
            #          "subnetType": aws_ec2.SubnetType.ISOLATED,
            #          "reserved": True
            #        }
            #      ],
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        dynamo_db_endpoint = vpc.add_gateway_endpoint(
            "DynamoDbEndpoint",
            service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB)

        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            bucket_name="octember-bizcard-{region}-{account}".format(
                region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID))

        api = apigw.RestApi(
            self,
            "BizcardImageUploader",
            rest_api_name="BizcardImageUploader",
            description="This service serves uploading bizcard images into s3.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            binary_media_types=["image/png", "image/jpg"],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        rest_api_role = aws_iam.Role(
            self,
            "ApiGatewayRoleForS3",
            role_name="ApiGatewayRoleForS3FullAccess",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess")
            ])

        list_objects_responses = [
            apigw.IntegrationResponse(
                status_code="200",
                #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters
                # The response parameters from the backend response that API Gateway sends to the method response.
                # Use the destination as the key and the source as the value:
                #  - The destination must be an existing response parameter in the MethodResponse property.
                #  - The source must be an existing method request parameter or a static value.
                response_parameters={
                    'method.response.header.Timestamp':
                    'integration.response.header.Date',
                    'method.response.header.Content-Length':
                    'integration.response.header.Content-Length',
                    'method.response.header.Content-Type':
                    'integration.response.header.Content-Type'
                }),
            apigw.IntegrationResponse(status_code="400",
                                      selection_pattern="4\d{2}"),
            apigw.IntegrationResponse(status_code="500",
                                      selection_pattern="5\d{2}")
        ]

        list_objects_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses)

        get_s3_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path='/',
            options=list_objects_integration_options)

        api.root.add_method(
            "GET",
            get_s3_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={'method.request.header.Content-Type': False})

        get_s3_folder_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters
            # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value.
            # The source must be an existing method request parameter or a static value.
            request_parameters={
                "integration.request.path.bucket": "method.request.path.folder"
            })

        get_s3_folder_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}",
            options=get_s3_folder_integration_options)

        s3_folder = api.root.add_resource('{folder}')
        s3_folder.add_method(
            "GET",
            get_s3_folder_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True
            })

        get_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            request_parameters={
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        get_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}/{object}",
            options=get_s3_item_integration_options)

        s3_item = s3_folder.add_resource('{item}')
        s3_item.add_method(
            "GET",
            get_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        put_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=[
                apigw.IntegrationResponse(status_code="200"),
                apigw.IntegrationResponse(status_code="400",
                                          selection_pattern="4\d{2}"),
                apigw.IntegrationResponse(status_code="500",
                                          selection_pattern="5\d{2}")
            ],
            request_parameters={
                "integration.request.header.Content-Type":
                "method.request.header.Content-Type",
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        put_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="PUT",
            path="{bucket}/{object}",
            options=put_s3_item_integration_options)

        s3_item.add_method(
            "PUT",
            put_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        ddb_table = dynamodb.Table(
            self,
            "BizcardImageMetaInfoDdbTable",
            table_name="OctemberBizcardImgMeta",
            partition_key=dynamodb.Attribute(
                name="image_id", type=dynamodb.AttributeType.STRING),
            billing_mode=dynamodb.BillingMode.PROVISIONED,
            read_capacity=15,
            write_capacity=5)

        img_kinesis_stream = kinesis.Stream(
            self, "BizcardImagePath", stream_name="octember-bizcard-image")

        # create lambda function
        trigger_textract_lambda_fn = _lambda.Function(
            self,
            "TriggerTextExtractorFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="TriggerTextExtractorFromImage",
            handler="trigger_text_extract_from_s3_image.lambda_handler",
            description="Trigger to extract text from an image in S3",
            code=_lambda.Code.asset(
                "./src/main/python/TriggerTextExtractFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        ddb_table_rw_policy_statement = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[ddb_table.table_arn],
            actions=[
                "dynamodb:BatchGetItem", "dynamodb:Describe*",
                "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query",
                "dynamodb:Scan", "dynamodb:BatchWriteItem",
                "dynamodb:DeleteItem", "dynamodb:PutItem",
                "dynamodb:UpdateItem", "dax:Describe*", "dax:List*",
                "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan",
                "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem",
                "dax:UpdateItem"
            ])

        trigger_textract_lambda_fn.add_to_role_policy(
            ddb_table_rw_policy_statement)
        trigger_textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[img_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        # assign notification for the s3 event type (ex: OBJECT_CREATED)
        s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/",
                                                   suffix=".jpg")
        s3_event_source = S3EventSource(s3_bucket,
                                        events=[s3.EventType.OBJECT_CREATED],
                                        filters=[s3_event_filter])
        trigger_textract_lambda_fn.add_event_source(s3_event_source)

        #XXX: https://github.com/aws/aws-cdk/issues/2240
        # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a
        # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props
        log_group = aws_logs.LogGroup(
            self,
            "TriggerTextractLogGroup",
            log_group_name="/aws/lambda/TriggerTextExtractorFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(trigger_textract_lambda_fn)

        text_kinesis_stream = kinesis.Stream(
            self, "BizcardTextData", stream_name="octember-bizcard-txt")

        textract_lambda_fn = _lambda.Function(
            self,
            "GetTextFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="GetTextFromImage",
            handler="get_text_from_s3_image.lambda_handler",
            description="extract text from an image in S3",
            code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement)
        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["textract:*"]))

        img_kinesis_event_source = KinesisEventSource(
            img_kinesis_stream,
            batch_size=100,
            starting_position=_lambda.StartingPosition.LATEST)
        textract_lambda_fn.add_event_source(img_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "GetTextFromImageLogGroup",
            log_group_name="/aws/lambda/GetTextFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(textract_lambda_fn)

        sg_use_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard elasticsearch client',
            security_group_name='use-octember-bizcard-es')
        core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es')

        sg_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard elasticsearch',
            security_group_name='octember-bizcard-es')
        core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es')

        sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='octember-bizcard-es')
        sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='use-octember-bizcard-es')

        sg_ssh_access = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for bastion host',
            security_group_name='octember-bastion-host-sg')
        core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host')
        sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(),
                                       connection=aws_ec2.Port.tcp(22),
                                       description='ssh access')

        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=aws_ec2.InstanceType('t3.nano'),
            security_group=sg_ssh_access,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC))
        bastion_host.instance.add_security_group(sg_use_bizcard_es)

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            'BizcardSearch',
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name="octember-bizcard",
            elasticsearch_version="7.9",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(service="es",
                                    resource="domain",
                                    resource_name="octember-bizcard/*")
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_bizcard_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids
            })
        core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es')

        s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name")

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        s3_lib_bucket = s3.Bucket.from_bucket_name(self, id,
                                                   s3_lib_bucket_name)
        es_lib_layer = _lambda.LayerVersion(
            self,
            "ESLib",
            layer_version_name="es-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-es-lib.zip"))

        redis_lib_layer = _lambda.LayerVersion(
            self,
            "RedisLib",
            layer_version_name="redis-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-redis-lib.zip"))

        #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342
        upsert_to_es_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToES",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToElasticSearch",
            handler="upsert_bizcard_to_es.lambda_handler",
            description="Upsert bizcard text into elasticsearch",
            code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard'
            },
            timeout=core.Duration.minutes(5),
            layers=[es_lib_layer],
            security_groups=[sg_use_bizcard_es],
            vpc=vpc)

        text_kinesis_event_source = KinesisEventSource(
            text_kinesis_stream,
            batch_size=99,
            starting_position=_lambda.StartingPosition.LATEST)
        upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToESLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToElasticSearch",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_es_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "glue:GetTable",
                                        "glue:GetTableVersion",
                                        "glue:GetTableVersions"
                                    ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:DescribeStream",
                                        "kinesis:GetShardIterator",
                                        "kinesis:GetRecords"
                                    ]))

        firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(service="logs",
                                    resource="log-group",
                                    resource_name="{}:log-stream:*".format(
                                        firehose_log_group_name),
                                    sep=":")
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "BizcardTextToS3",
            delivery_stream_name="octember-bizcard-txt-to-s3",
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration={
                "kinesisStreamArn": text_kinesis_stream.stream_arn,
                "roleArn": firehose_role.role_arn
            },
            extended_s3_destination_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Delivery"
                },
                "compressionFormat": "GZIP",
                "prefix": "bizcard-text/",
                "roleArn": firehose_role.role_arn
            })

        sg_use_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache client',
            security_group_name='use-octember-bizcard-es-cache')
        core.Tags.of(sg_use_bizcard_es_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache',
            security_group_name='octember-bizcard-es-cache')
        core.Tags.of(sg_bizcard_es_cache).add('Name',
                                              'octember-bizcard-es-cache')

        sg_bizcard_es_cache.add_ingress_rule(
            peer=sg_use_bizcard_es_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-es-cache')

        es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "QueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-es-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-es-cache')

        es_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardSearchQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-es-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-es-cache',
            vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id])

        #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster.
        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname
        es_query_cache.add_depends_on(es_query_cache_subnet_group)

        #XXX: add more than 2 security groups
        # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387
        # https://github.com/aws/aws-cdk/issues/1555
        # https://github.com/aws/aws-cdk/pull/5049
        bizcard_search_lambda_fn = _lambda.Function(
            self,
            "BizcardSearchServer",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardSearchProxy",
            handler="es_search_bizcard.lambda_handler",
            description="Proxy server to search bizcard text",
            code=_lambda.Code.asset("./src/main/python/SearchBizcard"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard',
                'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[es_lib_layer, redis_lib_layer],
            security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        search_api = apigw.LambdaRestApi(
            self,
            "BizcardSearchAPI",
            handler=bizcard_search_lambda_fn,
            proxy=False,
            rest_api_name="BizcardSearch",
            description="This service serves searching bizcard text.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_search = search_api.root.add_resource('search')
        bizcard_search.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sg_use_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db client',
            security_group_name='use-octember-bizcard-neptune')
        core.Tags.of(sg_use_bizcard_graph_db).add(
            'Name', 'use-octember-bizcard-neptune')

        sg_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db',
            security_group_name='octember-bizcard-neptune')
        core.Tags.of(sg_bizcard_graph_db).add('Name',
                                              'octember-bizcard-neptune')

        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='octember-bizcard-neptune')
        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_use_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='use-octember-bizcard-neptune')

        bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(
            self,
            "NeptuneSubnetGroup",
            db_subnet_group_description=
            "subnet group for octember-bizcard-neptune",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            db_subnet_group_name='octember-bizcard-neptune')

        bizcard_graph_db = aws_neptune.CfnDBCluster(
            self,
            "BizcardGraphDB",
            availability_zones=vpc.availability_zones,
            db_subnet_group_name=bizcard_graph_db_subnet_group.
            db_subnet_group_name,
            db_cluster_identifier="octember-bizcard",
            backup_retention_period=1,
            preferred_backup_window="08:45-09:15",
            preferred_maintenance_window="sun:18:00-sun:18:30",
            vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id])
        bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group)

        bizcard_graph_db_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[0],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_instance.add_depends_on(bizcard_graph_db)

        bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBReplicaInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[-1],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard-replica",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db)
        bizcard_graph_db_replica_instance.add_depends_on(
            bizcard_graph_db_instance)

        gremlinpython_lib_layer = _lambda.LayerVersion(
            self,
            "GremlinPythonLib",
            layer_version_name="gremlinpython-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(
                s3_lib_bucket, "var/octember-gremlinpython-lib.zip"))

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        upsert_to_neptune_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToGraphDB",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToNeptune",
            handler="upsert_bizcard_to_graph_db.lambda_handler",
            description="Upsert bizcard into neptune",
            code=_lambda.Code.asset(
                "./src/main/python/UpsertBizcardToGraphDB"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port
            },
            timeout=core.Duration.minutes(5),
            layers=[gremlinpython_lib_layer],
            security_groups=[sg_use_bizcard_graph_db],
            vpc=vpc)

        upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToGraphDBLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToNeptune",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_neptune_lambda_fn)

        sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache client',
            security_group_name='use-octember-bizcard-neptune-cache')
        core.Tags.of(sg_use_bizcard_neptune_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache',
            security_group_name='octember-bizcard-neptune-cache')
        core.Tags.of(sg_bizcard_neptune_cache).add(
            'Name', 'octember-bizcard-neptune-cache')

        sg_bizcard_neptune_cache.add_ingress_rule(
            peer=sg_use_bizcard_neptune_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-neptune-cache')

        recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "RecommQueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-neptune-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-neptune-cache')

        recomm_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardRecommQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-neptune-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-neptune-cache',
            vpc_security_group_ids=[
                sg_bizcard_neptune_cache.security_group_id
            ])

        recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group)

        bizcard_recomm_lambda_fn = _lambda.Function(
            self,
            "BizcardRecommender",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardRecommender",
            handler="neptune_recommend_bizcard.lambda_handler",
            description="This service serves PYMK(People You May Know).",
            code=_lambda.Code.asset("./src/main/python/RecommendBizcard"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port,
                'ELASTICACHE_HOST':
                recomm_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[gremlinpython_lib_layer, redis_lib_layer],
            security_groups=[
                sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache
            ],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        recomm_api = apigw.LambdaRestApi(
            self,
            "BizcardRecommendAPI",
            handler=bizcard_recomm_lambda_fn,
            proxy=False,
            rest_api_name="BizcardRecommend",
            description="This service serves PYMK(People You May Know).",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_recomm = recomm_api.root.add_resource('pymk')
        bizcard_recomm.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:s3:::aws-neptune-notebook",
                        "arn:aws:s3:::aws-neptune-notebook/*"
                    ],
                    "actions": ["s3:GetObject", "s3:ListBucket"]
                }))

        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".
                        format(region=core.Aws.REGION,
                               account=core.Aws.ACCOUNT_ID,
                               cluster_id=bizcard_graph_db.
                               attr_cluster_resource_id)
                    ],
                    "actions": ["neptune-db:connect"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookForNeptuneWorkbenchRole',
            role_name='AWSNeptuneNotebookRole-OctemberBizcard',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={
                'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
            })

        neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint,
           NeptuneClusterPort=bizcard_graph_db.attr_port,
           AWS_Region=core.Aws.REGION)

        neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(neptune_wb_lifecycle_content))

        neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'NpetuneWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'AWSNeptuneWorkbenchOctemberBizcardLCConfig',
            on_start=[neptune_wb_lifecycle_config_prop])

        neptune_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'NeptuneWorkbench',
            instance_type='ml.t2.medium',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=neptune_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='OctemberBizcard-NeptuneWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_bizcard_graph_db.security_group_name],
            subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
Example #18
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        # VPC
        self.vpc = ec2.Vpc(
            self,
            "VPC",
            max_azs=2,
            cidr="10.10.0.0/16",
            subnet_configuration=[
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC,
                                        name="Public",
                                        cidr_mask=24),
                ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE,
                                        name="Private",
                                        cidr_mask=24)
            ],
            nat_gateways=1)

        # Security group
        # self.sg = ec2.SecurityGroup(self, "securityGroup", self.vpc)
        self.sg = ec2.SecurityGroup.from_security_group_id(
            self,
            "securityGroup",
            self.vpc.vpc_default_security_group,
            mutable=False)

        # Create EFS inside VPC
        self.efs = efs.FileSystem(
            self,
            "commonEFS4Notebooks",
            vpc=self.vpc,
            encrypted=True,
            enable_automatic_backups=True,
            performance_mode=efs.PerformanceMode('MAX_IO'),
            throughput_mode=efs.ThroughputMode('BURSTING'),
            security_group=self.sg)

        # Mount target for EFS
        # self.mount = efs.CfnMountTarget(
        #     self,
        #     "MountID",
        #     file_system_id=self.efs.file_system_id,security_groups=[self.sg.security_group_id,],
        #     subnet_id=self.vpc.private_subnets[0].subnet_id,
        #     )

        # IAM Roles
        #Create role for Notebook instance
        nRole = iam_.Role(self,
                          "notebookAccessRole",
                          assumed_by=iam_.ServicePrincipal('sagemaker'))

        nPolicy = iam_.Policy(self,
                              "notebookAccessPolicy",
                              policy_name="notebookAccessPolicy",
                              statements=[
                                  iam_.PolicyStatement(actions=[
                                      's3:*',
                                  ],
                                                       resources=[
                                                           '*',
                                                       ]),
                              ]).attach_to_role(nRole)

        #Create notebook instances cluster

        # print(self.mount.get_att('attr_ip_address').to_string())
        encodedScript = LifecycleScriptStr.format(self.efs.file_system_id)
        # print("Adding following script to the lifecycle config..\n___\n\n"+encodedScript)

        code = [{"content": core.Fn.base64(encodedScript)}]

        lifecycleconfig = sm.CfnNotebookInstanceLifecycleConfig(
            self,
            "LifeCycleConfig",
            notebook_instance_lifecycle_config_name=LifeCycleConfigName,
            on_create=None,
            on_start=code)

        instances = []
        for i in range(num_instances):
            nid = 'CDK-Notebook-Instance-User-' + str(i)
            instances.append(
                sm.CfnNotebookInstance(
                    self,
                    nid,
                    instance_type='ml.t2.medium',
                    volume_size_in_gb=5,
                    security_group_ids=[self.sg.security_group_id],
                    subnet_id=self.vpc.private_subnets[0].subnet_id,
                    notebook_instance_name=nid,
                    role_arn=nRole.role_arn,
                    lifecycle_config_name=lifecycleconfig.
                    notebook_instance_lifecycle_config_name))

        core.CfnOutput(self, "VPC_id", value=self.vpc.vpc_id)
        core.CfnOutput(self, "EFS_id", value=self.efs.file_system_id)
        [
            core.CfnOutput(self,
                           "NotebookInstance_" + str(c),
                           value=notebook.notebook_instance_name)
            for c, notebook in enumerate(instances)
        ]
Example #19
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        vpc_name = self.node.try_get_context('vpc_name')
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      'ExistingVPC',
                                      is_default=True,
                                      vpc_name=vpc_name)

        sg_use_docdb = aws_ec2.SecurityGroup(
            self,
            'DocDBClientSG',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for documentdb client',
            security_group_name='use-docdb-sg')
        core.Tags.of(sg_use_docdb).add('Name', 'docdb-client-sg')

        sg_docdb_server = aws_ec2.SecurityGroup(
            self,
            'DocDBServerSG',
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for documentdb',
            security_group_name='{stack_name}-server-sg-docdb'.format(
                stack_name=self.stack_name))
        sg_docdb_server.add_ingress_rule(peer=sg_use_docdb,
                                         connection=aws_ec2.Port.tcp(27017),
                                         description='docdb-client-sg')
        core.Tags.of(sg_docdb_server).add('Name', 'docdb-server-sg')

        docdb_cluster_name = self.node.try_get_context('docdb_cluster_name')
        docdb_cluster_name = docdb_cluster_name if docdb_cluster_name else self.stack_name
        docdb_cluster = aws_docdb.DatabaseCluster(
            self,
            'DocDB',
            db_cluster_name=docdb_cluster_name,
            master_user=aws_docdb.Login(username='******'),
            instance_props={
                #'instance_type': aws_ec2.InstanceType('r5.xlarge'),
                'instance_type':
                aws_ec2.InstanceType.of(aws_ec2.InstanceClass.MEMORY5,
                                        aws_ec2.InstanceSize.LARGE),
                'vpc_subnets': {
                    'subnet_type': aws_ec2.SubnetType.PRIVATE
                },
                'vpc':
                vpc,
                'security_group':
                sg_docdb_server
            },
            instances=3,
            preferred_maintenance_window='sun:18:00-sun:18:30',
            removal_policy=core.RemovalPolicy.RETAIN)

        #[Warning at /docdb-sm/Database/RotationSingleUser/SecurityGroup] Ignoring Egress rule since 'allowAllOutbound' is set to true; To add customize rules, set allowAllOutbound=false on the SecurityGroup
        #docdb_cluster.add_rotation_single_user()

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect": aws_iam.Effect.ALLOW,
                    "resources": [docdb_cluster.secret.secret_full_arn],
                    "actions": ["secretsmanager:GetSecretValue"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookRoleForDocDB',
            role_name='AWSSageMakerNotebookRoleForDocDB',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            inline_policies={
                'DocumentDBSecretPolicy': sagemaker_notebook_role_policy_doc
            })

        docdb_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
source /home/ec2-user/anaconda3/bin/activate python3
pip install --upgrade pymongo
pip install --upgrade xgboost
source /home/ec2-user/anaconda3/bin/deactivate
cd /home/ec2-user/SageMaker
wget https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem
wget https://raw.githubusercontent.com/aws-samples/documentdb-sagemaker-example/main/script.ipynb
EOF
'''.format(AWS_Region=core.Aws.REGION)

        docdb_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(docdb_wb_lifecycle_content))

        docdb_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'DocDBWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'DocDBWorkbenchLifeCycleConfig',
            on_start=[docdb_wb_lifecycle_config_prop])

        docdb_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'DocDBWorkbench',
            instance_type='ml.t3.xlarge',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=docdb_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='DocDBWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_docdb.security_group_name],
            subnet_id=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids[0])

        core.CfnOutput(self,
                       'StackName',
                       value=self.stack_name,
                       export_name='StackName')
        core.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId')

        core.CfnOutput(self,
                       'DocumentDBClusterName',
                       value=docdb_cluster.cluster_identifier,
                       export_name='DocumentDBClusterName')
        core.CfnOutput(self,
                       'DocumentDBCluster',
                       value=docdb_cluster.cluster_endpoint.socket_address,
                       export_name='DocumentDBCluster')
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_secretsmanager/README.html
        # secret_arn="arn:aws:secretsmanager:<region>:<account-id-number>:secret:<secret-name>-<random-6-characters>",
        core.CfnOutput(self,
                       'DocDBSecret',
                       value=docdb_cluster.secret.secret_name,
                       export_name='DocDBSecret')

        core.CfnOutput(self,
                       'SageMakerRole',
                       value=sagemaker_notebook_role.role_name,
                       export_name='SageMakerRole')
        core.CfnOutput(self,
                       'SageMakerNotebookInstance',
                       value=docdb_workbench.notebook_instance_name,
                       export_name='SageMakerNotebookInstance')
        core.CfnOutput(self,
                       'SageMakerNotebookInstanceLifecycleConfig',
                       value=docdb_workbench.lifecycle_config_name,
                       export_name='SageMakerNotebookInstanceLifecycleConfig')