Exemple #1
0
    def _generate_sam_template_with_assets(self, chalice_out_dir, package_id):
        # type: (str, str) -> str
        deployment_zip_path = os.path.join(self._sam_package_dir,
                                           'deployment.zip')
        sam_deployment_asset = assets.Asset(self,
                                            'ChaliceAppCode',
                                            path=deployment_zip_path)
        sam_template_path = os.path.join(self._sam_package_dir, 'sam.json')
        sam_template_with_assets_path = os.path.join(
            chalice_out_dir, '%s.sam_with_assets.json' % package_id)

        with open(sam_template_path) as sam_template_file:
            sam_template = json.load(sam_template_file)
            for function in self._filter_resources(
                    sam_template, 'AWS::Serverless::Function'):
                function['Properties']['CodeUri'] = {
                    'Bucket': sam_deployment_asset.s3_bucket_name,
                    'Key': sam_deployment_asset.s3_object_key
                }
            managed_layers = self._filter_resources(
                sam_template, 'AWS::Serverless::LayerVersion')
            if len(managed_layers) == 1:
                layer_filename = os.path.join(self._sam_package_dir,
                                              'layer-deployment.zip')
                layer_asset = assets.Asset(self,
                                           'ChaliceManagedLayer',
                                           path=layer_filename)
                managed_layers[0]['Properties']['ContentUri'] = {
                    'Bucket': layer_asset.s3_bucket_name,
                    'Key': layer_asset.s3_object_key
                }
        with open(sam_template_with_assets_path, 'w') as f:
            f.write(json.dumps(sam_template, indent=2))
        return sam_template_with_assets_path
Exemple #2
0
    def _emr_spark_step_task(self):
        # Add a EMR Step to run our pyspark job; an asset with our application will be
        # created and referenced in the job definition
        root_path = Path(os.path.dirname(os.path.abspath(__file__)))
        pyspark_script = root_path.joinpath('pyspark', 'example.py').as_posix()
        pyspark_example_asset = s3_assets.Asset(self,
                                                "PythonScript",
                                                path=pyspark_script)

        sample_spark_step = sfn.Task(
            self,
            "RunSparkExample",
            task=sfnt.EmrAddStep(
                # the concrete ClusterId will be picked up from the current state of the statem achine
                cluster_id=sfn.Data.string_at("$.ClusterId"),
                name="SparkExample",
                # `command-runner.jar` is a jar from AWS that can be used to execute generic command (like `spark-submit`)
                # if you write your programs in Java/Scala you can directly insert your jar file here instead of script location
                jar="command-runner.jar",
                args=[
                    "spark-submit",
                    "--deploy-mode",
                    "cluster",
                    "--master",
                    "yarn",
                    f"s3://{pyspark_example_asset.s3_bucket_name}/{pyspark_example_asset.s3_object_key}",
                ],
            ),
            result_path="DISCARD",
        )
        return sample_spark_step
Exemple #3
0
    def upload_to_assets_bucket(self, construct_id, file_name):

        file_path = './assets'
        file_name = file_name
        asset_bucket = s3assets.Asset(self,
                                      id=f'{construct_id}-AssetBucket',
                                      path=os.path.join(file_path, file_name))
        return asset_bucket
Exemple #4
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self._role = iam.Role(
            self,
            id='role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'))

        self._files = s3.Asset(self, id='files', path='files')
        self._files.grant_read(self._role)
Exemple #5
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        # bucket = s3.Bucket(self, "sitebucket", bucket_name="fadhil-getting-started-bucket", public_read_access=True, website_index_document="index.html")
        # core.CfnOutput(self, "sitebucketname", value=bucket.bucket_name)
        # core.CfnOutput(self, "siteBucketWebsite", value=bucket.bucket_website_url)

        asset = assets.Asset(self,
                             "SampleAsset",
                             path="./sample-asset/index.html")
Exemple #6
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        cfn_role = iam.Role(
            self,
            'CloudFormationRole',
            assumed_by=iam.ServicePrincipal('cloudformation.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonEC2FullAccess'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'IAMFullAccess'),
            ])

        alice = iam.User(
            self,
            'Alice',
            user_name='alice',
            password=SecretValue.ssm_secure(
                parameter_name='/demo/permission-boundary/password',
                version='2',
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonEC2ReadOnlyAccess'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSCloudFormationFullAccess'),
            ],
        )
        alice.add_to_principal_policy(
            iam.PolicyStatement(
                actions=['iam:PassRole'],
                resources=[cfn_role.role_arn],
            ))
        alice.add_to_principal_policy(
            iam.PolicyStatement(
                actions=['iam:ListRoles'],
                resources=['*'],
            ))

        template = assets.Asset(
            self,
            'Template',
            path='./files/template.yml',
            readers=[alice, cfn_role],
        )

        CfnOutput(self, 'TemplateUrl', value=template.http_url)
Exemple #7
0
    def _update_sam_template(self):
        deployment_zip_path = os.path.join(self.sam_package_dir, 'deployment.zip')
        sam_deployment_asset = assets.Asset(
            self, 'ChaliceAppCode', path=deployment_zip_path)
        sam_template_path = os.path.join(self.sam_package_dir, 'sam.json')

        with open(sam_template_path) as sam_template_file:
            sam_template = json.load(sam_template_file)
            functions = filter(
                lambda resource: resource['Type'] == 'AWS::Serverless::Function',
                sam_template['Resources'].values()
            )
            for function in functions:
                function['Properties']['CodeUri'] = {
                    'Bucket': sam_deployment_asset.s3_bucket_name,
                    'Key': sam_deployment_asset.s3_object_key
                }

        return sam_template
Exemple #8
0
    def create_api_gateway(self) -> None:
        """
        Create API gateway and lambda integration
        """

        # api_stage = core.CfnParameter(self, id="ApiStage", type=str)
        openapi_asset = s3_assets.Asset(
            self,
            "openapi_asset",
            path="cbers2stac/openapi/core-item-search-query-integrated.yaml",
        )
        data = core.Fn.transform("AWS::Include",
                                 {"Location": openapi_asset.s3_object_url})
        definition = apigateway.AssetApiDefinition.from_inline(data)
        apigateway.SpecRestApi(
            self,
            id="stacapi",
            api_definition=definition,
            deploy_options=apigateway.StageOptions(
                logging_level=apigateway.MethodLoggingLevel.INFO),
        )
Exemple #9
0
    def __init__(
        self,
        scope: core.Construct,
        construct_id: str,
        **kwargs,
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        execution_role_arn = core.CfnParameter(
            self,
            "ExecutionRoleArn",
            type="String",
            description="The SageMaker Studio execution role",
        )

        portfolio_name = core.CfnParameter(
            self,
            "PortfolioName",
            type="String",
            description="The name of the portfolio",
            default="SageMaker Organization Templates",
        )

        portfolio_owner = core.CfnParameter(
            self,
            "PortfolioOwner",
            type="String",
            description="The owner of the portfolio.",
            default="administrator",
        )

        product_version = core.CfnParameter(
            self,
            "ProductVersion",
            type="String",
            description="The product version to deploy",
            default="1.0",
        )

        portfolio = aws_servicecatalog.CfnPortfolio(
            self,
            "Portfolio",
            display_name=portfolio_name.value_as_string,
            description="Organization templates for AB Testing pipeline",
            provider_name=portfolio_owner.value_as_string,
        )

        asset = aws_s3_assets.Asset(
            self, "TemplateAsset", path="./ab-testing-pipeline.yml"
        )

        product = aws_servicecatalog.CfnCloudFormationProduct(
            self,
            "Product",
            name="A/B Testing Deployment Pipeline",
            description="Amazon SageMaker Project for A/B Testing models",
            owner=portfolio_owner.value_as_string,
            provisioning_artifact_parameters=[
                aws_servicecatalog.CfnCloudFormationProduct.ProvisioningArtifactPropertiesProperty(
                    name=product_version.value_as_string,
                    info={"LoadTemplateFromURL": asset.s3_url},
                ),
            ],
            tags=[
                core.CfnTag(key="sagemaker:studio-visibility", value="true"),
            ],
        )

        aws_servicecatalog.CfnPortfolioProductAssociation(
            self,
            "ProductAssoication",
            portfolio_id=portfolio.ref,
            product_id=product.ref,
        )

        launch_role = aws_iam.Role.from_role_arn(
            self,
            "LaunchRole",
            role_arn=f"arn:{self.partition}:iam::{self.account}:role/service-role/AmazonSageMakerServiceCatalogProductsLaunchRole",
        )

        aws_servicecatalog.CfnLaunchRoleConstraint(
            self,
            "LaunchRoleConstraint",
            portfolio_id=portfolio.ref,
            product_id=product.ref,
            role_arn=launch_role.role_arn,
            description=f"Launch as {launch_role.role_arn}",
        )

        aws_servicecatalog.CfnPortfolioPrincipalAssociation(
            self,
            "PortfolioPrincipalAssociation",
            portfolio_id=portfolio.ref,
            principal_arn=execution_role_arn.value_as_string,
            principal_type="IAM",
        )

        # Create the deployment asset as an output to pass to pipeline stack
        deployment_asset = aws_s3_assets.Asset(
            self, "DeploymentAsset", path="./deployment_pipeline"
        )

        deployment_asset.grant_read(grantee=launch_role)

        # Ouput the deployment bucket and key, for input into pipeline stack
        core.CfnOutput(
            self,
            "CodeCommitSeedBucket",
            value=deployment_asset.s3_bucket_name,
        )
        core.CfnOutput(self, "CodeCommitSeedKey", value=deployment_asset.s3_object_key)
Exemple #10
0
    def __init__(self, scope: core.Construct, id: str, vpc: ec2.IVpc,
                 cluster: NeptuneCluster, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        role = iam.Role(
            self,
            'Ec2Role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSSMManagedInstanceCore'),
            ],
        )

        config_asset = s3_assets.Asset(
            self,
            'ConfigYaml',
            path='./files/neptune-remote.yaml',
            readers=[role],
        )

        sg = ec2.SecurityGroup(
            self,
            'SecurityGroup',
            vpc=vpc,
        )

        user_data = ec2.UserData.for_linux()
        user_data.add_commands(
            'yum update -y',
            'yum install -y java-1.8.0-devel',
            'cd ~',
            # Install the CA certificate
            'mkdir /tmp/certs/',
            'cp /etc/pki/java/cacerts /tmp/certs/cacerts',
            'wget https://www.amazontrust.com/repository/SFSRootCAG2.cer',
            'keytool -importcert -alias neptune-ca -keystore /tmp/certs/cacerts -file /root/SFSRootCAG2.cer -noprompt -storepass changeit',
            # Download Gremlin console
            'wget https://archive.apache.org/dist/tinkerpop/3.4.8/apache-tinkerpop-gremlin-console-3.4.8-bin.zip',
            'unzip apache-tinkerpop-gremlin-console-3.4.8-bin.zip',
            # Download default configuration and update endpoint url
            'cd apache-tinkerpop-gremlin-console-3.4.8',
            'aws s3 cp s3://{bucket}/{key} conf/neptune-remote.yaml'.format(
                bucket=config_asset.s3_bucket_name,
                key=config_asset.s3_object_key,
            ),
            'sed -i "s/ENDPOINT_URL/{endpoint_url}/g" conf/neptune-remote.yaml'
            .format(endpoint_url=cluster.endpoint, ),
        )

        ec2.Instance(
            self,
            'Instance',
            role=role,
            vpc=vpc,
            security_group=sg,
            user_data=user_data,
            user_data_causes_replacement=True,
            instance_type=ec2.InstanceType.of(
                instance_class=ec2.InstanceClass.BURSTABLE3_AMD,
                instance_size=ec2.InstanceSize.NANO,
            ),
            machine_image=ec2.AmazonLinuxImage(
                generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, ),
        )

        self.role = role
        self.security_group = sg
 def define_userdata_asset(self, path, filename):
     full_path = os.path.join(path, filename)
     if os.path.isfile(full_path):
         return s3_assets.Asset(self, "UserDataAsset", path=full_path)
     else:
         print(f"Could not find {full_path}")
Exemple #12
0
    def __init__(self, app: core.App, id: str) -> None:
        super().__init__(app, id)

        ##################################
        # Lambda Timeouts (seconds) & Queue Redrive
        ##################################

        lambda_gatherer_timeout = 600
        lambda_joiner_timeout = 350
        # pa11y's timeout is set to 50, so the lambda is just a little longer
        lambda_a11y_scan_timeout = 55
        max_receive_count = 2

        ##################################
        # S3 Bucket with Domains
        ##################################

        asset = aws_s3_assets.Asset(
            self, 'domain-list', path=os.path.abspath('./domains/domains.csv'))

        ##################################
        # Domain Gatherer Lambda and Queue
        ##################################

        domain_queue = sqs.Queue(
            self,
            'domain-queue',
            visibility_timeout=core.Duration.seconds(
                (max_receive_count + 1) * lambda_gatherer_timeout),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=max_receive_count,
                queue=sqs.Queue(self,
                                'domain-queue-dlq',
                                retention_period=core.Duration.days(5))))

        lambda_gatherer = lambda_.Function(
            self,
            "domain-gatherer",
            code=lambda_.Code.from_asset('./lambdas/domain_gatherer'),
            handler="handler.main",
            timeout=core.Duration.seconds(lambda_gatherer_timeout),
            runtime=lambda_.Runtime.PYTHON_3_7,
            memory_size=150)

        lambda_gatherer.add_environment('SQS_URL', domain_queue.queue_url)
        lambda_gatherer.add_environment('BUCKET_NAME', asset.s3_bucket_name)
        lambda_gatherer.add_environment('OBJECT_KEY', asset.s3_object_key)

        lambda_gatherer_sqs_exec_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            actions=[
                'lambda:InvokeFunction', 'sqs:SendMessage',
                'sqs:DeleteMessage', 'sqs:SendMessageBatch',
                'sqs:SetQueueAttributes', 'sqs:GetQueueAttributes',
                'sqs:GetQueueUrl', 'sqs:GetQueueAttributes'
            ],
            resources=[domain_queue.queue_arn])
        lambda_gatherer.add_to_role_policy(lambda_gatherer_sqs_exec_policy)
        domain_queue.grant_send_messages(lambda_gatherer)

        # trigger for 1st and 15th of the month at 18:00 UTC (1pm EST)
        lambda_gatherer_rule = events.Rule(self,
                                           "Lambda Gatherer Rule",
                                           schedule=events.Schedule.cron(
                                               minute='0',
                                               hour='18',
                                               day="1,15",
                                               month='*',
                                               year='*'))
        lambda_gatherer_rule.add_target(
            targets.LambdaFunction(lambda_gatherer))
        asset.grant_read(lambda_gatherer)

        ##################################
        # A11y Scanner Lambda and S3
        ##################################

        layer = lambda_.LayerVersion(
            self,
            'chrome-aws-lambda',
            code=lambda_.Code.from_asset('./lambdas/chrome_aws_lambda.zip'),
            compatible_runtimes=[lambda_.Runtime.NODEJS_12_X],
            description='A layer of chrome-aws-lambda')

        lambda_a11y_scan = lambda_.Function(
            self,
            "a11y-scan",
            code=lambda_.Code.from_asset('./lambdas/a11y_scan'),
            handler="index.handler",
            timeout=core.Duration.seconds(lambda_a11y_scan_timeout),
            runtime=lambda_.Runtime.NODEJS_12_X,
            memory_size=1000,
            layers=[layer])

        lambda_a11y_scan.add_event_source(
            sources.SqsEventSource(domain_queue, batch_size=1))

        # create s3 bucket to put results
        results_bucket = s3.Bucket(self,
                                   'results-bucket',
                                   versioned=False,
                                   removal_policy=core.RemovalPolicy.DESTROY,
                                   block_public_access=s3.BlockPublicAccess(
                                       block_public_acls=True,
                                       ignore_public_acls=True,
                                       block_public_policy=True,
                                       restrict_public_buckets=True),
                                   lifecycle_rules=[
                                       s3.LifecycleRule(
                                           enabled=True,
                                           expiration=core.Duration.days(10))
                                   ])

        lambda_a11y_scan.add_environment('BUCKET_NAME',
                                         results_bucket.bucket_name)
        results_bucket.grant_put(lambda_a11y_scan)

        ##################################
        # Results Joiner Lambda
        ##################################

        # create s3 bucket to put site data
        data_bucket = s3.Bucket(self,
                                'data-bucket',
                                versioned=False,
                                removal_policy=core.RemovalPolicy.DESTROY,
                                block_public_access=s3.BlockPublicAccess(
                                    block_public_acls=True,
                                    ignore_public_acls=True,
                                    block_public_policy=True,
                                    restrict_public_buckets=True))

        lambda_joiner = lambda_.Function(
            self,
            "results-joiner",
            code=lambda_.Code.from_asset(
                './lambda-releases/results_joiner.zip'),
            handler="handler.main",
            timeout=core.Duration.seconds(lambda_joiner_timeout),
            runtime=lambda_.Runtime.PYTHON_3_7,
            memory_size=400)
        lambda_joiner.add_environment('DATA_BUCKET_NAME',
                                      data_bucket.bucket_name)
        lambda_joiner.add_environment('RESULTS_BUCKET_NAME',
                                      results_bucket.bucket_name)
        results_bucket.grant_read_write(lambda_joiner)
        data_bucket.grant_read_write(lambda_joiner)

        # trigger for 8th and 23rd of the month at 18:00 UTC (1pm EST)
        lambda_joiner_rule = events.Rule(self,
                                         "Lambda Joiner Rule",
                                         schedule=events.Schedule.cron(
                                             minute='0',
                                             hour='18',
                                             day="8,23",
                                             month='*',
                                             year='*'))
        lambda_joiner_rule.add_target(targets.LambdaFunction(lambda_joiner))
Exemple #13
0
    def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ### Parameters
        bootstrap_script_args = cdk.CfnParameter(self, 'BootstrapScriptArgs',
            type='String',
            default='',
            description='Space seperated arguments passed to the bootstrap script.'
        )

        # create a VPC
        vpc = ec2.Vpc(self, 'VPC', cidr='10.0.0.0/16', max_azs=99)

        # create a private and public subnet per vpc
        selection = vpc.select_subnets(
            subnet_type=ec2.SubnetType.PRIVATE
        )

        # Output created subnets
        for i, public_subnet in enumerate(vpc.public_subnets):
            cdk.CfnOutput(self, 'PublicSubnet%i' % i,  value=public_subnet.subnet_id)

        for i, private_subnet in enumerate(vpc.private_subnets):
            cdk.CfnOutput(self, 'PrivateSubnet%i' % i,  value=private_subnet.subnet_id)

        cdk.CfnOutput(self, 'VPCId',  value=vpc.vpc_id)

        # Create a Bucket
        bucket = s3.Bucket(self, "DataRepository")
        quickstart_bucket = s3.Bucket.from_bucket_name(self, 'QuickStartBucket', 'aws-quickstart')

        # Upload Bootstrap Script to that bucket
        bootstrap_script = assets.Asset(self, 'BootstrapScript',
            path='scripts/bootstrap.sh'
        )

        # Upload parallel cluster post_install_script to that bucket
        pcluster_post_install_script = assets.Asset(self, 'PclusterPostInstallScript',
            path='scripts/post_install_script.sh'
        )

        # Setup CloudTrail
        cloudtrail.Trail(self, 'CloudTrail', bucket=bucket)

        # Create a Cloud9 instance
        # Cloud9 doesn't have the ability to provide userdata
        # Because of this we need to use SSM run command
        cloud9_instance = cloud9.Ec2Environment(self, 'Cloud9Env', vpc=vpc, instance_type=ec2.InstanceType(instance_type_identifier='c5.large'))
        cdk.CfnOutput(self, 'URL',  value=cloud9_instance.ide_url)


        # Create a keypair in lambda and store the private key in SecretsManager
        c9_createkeypair_role = iam.Role(self, 'Cloud9CreateKeypairRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        c9_createkeypair_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'))
        # Add IAM permissions to the lambda role
        c9_createkeypair_role.add_to_policy(iam.PolicyStatement(
            actions=[
                'ec2:CreateKeyPair',
                'ec2:DeleteKeyPair'
            ],
            resources=['*'],
        ))

        # Lambda for Cloud9 keypair
        c9_createkeypair_lambda = _lambda.Function(self, 'C9CreateKeyPairLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(300),
            role=c9_createkeypair_role,
            code=_lambda.Code.asset('functions/source/c9keypair'),
        #    code=_lambda.Code.from_bucket(
        )

        c9_createkeypair_provider = cr.Provider(self, "C9CreateKeyPairProvider", on_event_handler=c9_createkeypair_lambda)

        c9_createkeypair_cr = cfn.CustomResource(self, "C9CreateKeyPair", provider=c9_createkeypair_provider,
            properties={
                'ServiceToken': c9_createkeypair_lambda.function_arn
            }
        )
        #c9_createkeypair_cr.node.add_dependency(instance_id)
        c9_ssh_private_key_secret = secretsmanager.CfnSecret(self, 'SshPrivateKeySecret',
             secret_string=c9_createkeypair_cr.get_att_string('PrivateKey')
        )

        # The iam policy has a <REGION> parameter that needs to be replaced.
        # We do it programmatically so future versions of the synth'd stack
        # template include all regions.
        with open('iam/ParallelClusterUserPolicy.json') as json_file:
            data = json.load(json_file)
            for s in data['Statement']:
                if s['Sid'] == 'S3ParallelClusterReadOnly':
                    s['Resource'] = []
                    for r in region_info.RegionInfo.regions:
                        s['Resource'].append('arn:aws:s3:::{0}-aws-parallelcluster*'.format(r.name))

            parallelcluster_user_policy = iam.CfnManagedPolicy(self, 'ParallelClusterUserPolicy', policy_document=iam.PolicyDocument.from_json(data))

        # Cloud9 IAM Role
        cloud9_role = iam.Role(self, 'Cloud9Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'))
        cloud9_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore'))
        cloud9_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User'))
        cloud9_role.add_managed_policy(iam.ManagedPolicy.from_managed_policy_arn(self, 'AttachParallelClusterUserPolicy', parallelcluster_user_policy.ref))
        cloud9_role.add_to_policy(iam.PolicyStatement(
            resources=['*'],
            actions=[
                'ec2:DescribeInstances',
                'ec2:DescribeVolumes',
                'ec2:ModifyVolume'
            ]
        ))
        cloud9_role.add_to_policy(iam.PolicyStatement(
            resources=[c9_ssh_private_key_secret.ref],
            actions=[
                'secretsmanager:GetSecretValue'
            ]
        ))

        bootstrap_script.grant_read(cloud9_role)
        pcluster_post_install_script.grant_read(cloud9_role)

        # Cloud9 User
        # user = iam.User(self, 'Cloud9User', password=cdk.SecretValue.plain_text('supersecretpassword'), password_reset_required=True)

        # Cloud9 Setup IAM Role
        cloud9_setup_role = iam.Role(self, 'Cloud9SetupRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        cloud9_setup_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'))
        # Allow pcluster to be run in bootstrap
        cloud9_setup_role.add_managed_policy(iam.ManagedPolicy.from_managed_policy_arn(self, 'AttachParallelClusterUserPolicySetup', parallelcluster_user_policy.ref))

        # Add IAM permissions to the lambda role
        cloud9_setup_role.add_to_policy(iam.PolicyStatement(
            actions=[
                'cloudformation:DescribeStackResources',
                'ec2:AssociateIamInstanceProfile',
                'ec2:AuthorizeSecurityGroupIngress',
                'ec2:DescribeInstances',
                'ec2:DescribeInstanceStatus',
                'ec2:DescribeInstanceAttribute',
                'ec2:DescribeIamInstanceProfileAssociations',
                'ec2:DescribeVolumes',
                'ec2:DesctibeVolumeAttribute',
                'ec2:DescribeVolumesModifications',
                'ec2:DescribeVolumeStatus',
                'ssm:DescribeInstanceInformation',
                'ec2:ModifyVolume',
                'ec2:ReplaceIamInstanceProfileAssociation',
                'ec2:ReportInstanceStatus',
                'ssm:SendCommand',
                'ssm:GetCommandInvocation',
                's3:GetObject',
                'lambda:AddPermission',
                'lambda:RemovePermission',
                'events:PutRule',
                'events:DeleteRule',
                'events:PutTargets',
                'events:RemoveTargets',
            ],
            resources=['*'],
        ))

        cloud9_setup_role.add_to_policy(iam.PolicyStatement(
            actions=['iam:PassRole'],
            resources=[cloud9_role.role_arn]
        ))

        cloud9_setup_role.add_to_policy(iam.PolicyStatement(
            actions=[
                'lambda:AddPermission',
                'lambda:RemovePermission'
            ],
            resources=['*']
        ))

        # Cloud9 Instance Profile
        c9_instance_profile = iam.CfnInstanceProfile(self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name])

        # Lambda to add Instance Profile to Cloud9
        c9_instance_profile_lambda = _lambda.Function(self, 'C9InstanceProfileLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9InstanceProfile'),
        )

        c9_instance_profile_provider = cr.Provider(self, "C9InstanceProfileProvider",
            on_event_handler=c9_instance_profile_lambda,
        )

        instance_id = cfn.CustomResource(self, "C9InstanceProfile", provider=c9_instance_profile_provider,
            properties={
                'InstanceProfile': c9_instance_profile.ref,
                'Cloud9Environment': cloud9_instance.environment_id,
            }
        )
        instance_id.node.add_dependency(cloud9_instance)

        # Lambda for Cloud9 Bootstrap
        c9_bootstrap_lambda = _lambda.Function(self, 'C9BootstrapLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9bootstrap'),
        )

        c9_bootstrap_provider = cr.Provider(self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda)

        c9_bootstrap_cr = cfn.CustomResource(self, "C9Bootstrap", provider=c9_bootstrap_provider,
            properties={
                'Cloud9Environment': cloud9_instance.environment_id,
                'BootstrapPath': 's3://%s/%s' % (bootstrap_script.s3_bucket_name, bootstrap_script.s3_object_key),
                'BootstrapArguments': bootstrap_script_args,
                'VPCID': vpc.vpc_id,
                'MasterSubnetID': vpc.public_subnets[0].subnet_id,
                'ComputeSubnetID': vpc.private_subnets[0].subnet_id,
                'PostInstallScriptS3Url':  "".join( ['s3://', pcluster_post_install_script.s3_bucket_name,  "/", pcluster_post_install_script.s3_object_key ] ),
                'PostInstallScriptBucket': pcluster_post_install_script.s3_bucket_name,
                'KeyPairId':  c9_createkeypair_cr.ref,
                'KeyPairSecretArn': c9_ssh_private_key_secret.ref
            }
        )
        c9_bootstrap_cr.node.add_dependency(instance_id)
        c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr)
        c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret)
    def __init__(
        self, scope: cdk.Construct, construct_id: str, domain: SMSDomainStack, **kwargs
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # Create the Lambda Stack for pre-populating the user home directory
        studio_user_lambda = StudioUserLambda(
            self, "FnPopulateStudioUser", vpc=domain.vpc, domain=domain.domain
        )

        # Generate the CF template for the studio user
        stage = cdk.Stage(self, "IntermediateStage")
        SMSIAMUserStack(
            stage,
            "StudioUserStack",
            synthesizer=cdk.BootstraplessSynthesizer(),
        )
        assembly = stage.synth(force=True)

        # Retrive the local path of the CF template
        template_full_path = assembly.stacks[0].template_full_path

        # Upload CF template to s3 to create an asset to reference
        s3_asset = s3assets.Asset(
            self,
            "TemplateAsset",
            path=template_full_path,
        )

        # Create the Service Catalog product referencing the CF template
        sc_product = servicecatalog.CfnCloudFormationProduct(
            self,
            "StudioUser",
            owner="SageMakerStudio",
            provisioning_artifact_parameters=[
                servicecatalog.CfnCloudFormationProduct.ProvisioningArtifactPropertiesProperty(
                    info={"LoadTemplateFromURL": s3_asset.s3_url}
                )
            ],
            name="StudioUser",
        )

        # Create the Porduct Portfolio
        sc_portfolio = servicecatalog.CfnPortfolio(
            self,
            "SageMakerPortfolio",
            display_name="SageMakerPortfolio",
            provider_name="SageMakerTemplate",
        )

        # Associate the Studio User Template to the Portfolio
        servicecatalog.CfnPortfolioProductAssociation(
            self,
            "ProductAssociation",
            portfolio_id=sc_portfolio.ref,
            product_id=sc_product.ref,
        )

        # creat a role and associate it with the portfolio
        sc_role = iam.Role(
            self,
            "StudioAdminRole",
            assumed_by=iam.AnyPrincipal(),
            role_name="SageMakerStudioAdminRole",
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AWSServiceCatalogEndUserFullAccess"
                ),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonSageMakerFullAccess"
                ),
            ],
        )
        sc_role.add_to_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=[
                    "sagemaker:CreateUserProfile",
                ],
                resources=["*"],
            )
        )
        sc_role.add_to_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=[
                    "lambda:InvokeFunction",
                ],
                resources=[studio_user_lambda.provider.service_token],
            )
        )
        sc_role.add_to_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=[
                    "s3:GetObject",
                    "s3:ListBucket",
                ],
                resources=["*"],
            )
        )

        cdk.CfnOutput(
            self,
            "SageMakerStudioAdminRole",
            value=sc_role.role_arn,
            description="SageMakerStudioAdminRole",
            # export_name="SageMakerStudioAdminRole",
        )

        servicecatalog.CfnPortfolioPrincipalAssociation(
            self,
            "PortfolioPrincipalAssociacion",
            portfolio_id=sc_portfolio.ref,
            principal_arn=sc_role.role_arn,
            principal_type="IAM",
        )
Exemple #15
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        stack = core.Stack.of(self)

        vpc = ec2.Vpc(
            self,
            'Vpc',
            cidr='10.0.0.0/24',
            max_azs=2,  # Need at least 2 AZs for Neptune
            nat_gateways=1,  # Saving on cost by only using 1 NAT
        )

        # Custom Neptune construct
        cluster = NeptuneCluster(
            self,
            'Cluster',
            vpc=vpc,
            db_instance_class='db.r5.large',
        )

        # An EC2 instance to run commands from
        instance = Instance(
            self,
            'Instance',
            vpc=vpc,
            cluster=cluster,
        )

        # Allow EC2 instance connect to Neptune cluster
        cluster.security_group.add_ingress_rule(instance.security_group,
                                                ec2.Port.tcp(8182))

        # Demo files
        vertices_asset = s3_assets.Asset(
            self,
            'VerticesCsv',
            path='./files/vertices.csv',
            readers=[cluster.role],
        )
        edges_asset = s3_assets.Asset(
            self,
            'EdgesCsv',
            path='./files/edges.csv',
            readers=[cluster.role],
        )

        core.CfnOutput(
            self,
            'Command1LoadVertices',
            value='curl -X POST -H \'{headers}\' {url} -d \'{request_body}\''.
            format(
                headers='Content-Type: application/json',
                url='https://{endpoint}:8182/loader'.format(
                    endpoint=cluster.endpoint),
                request_body=json_encode({
                    'failOnError':
                    'FALSE',
                    'format':
                    'csv',
                    'region':
                    stack.region,
                    'iamRoleArn':
                    cluster.role.role_arn,
                    'source':
                    's3://{bucket}/{key}'.format(
                        bucket=vertices_asset.s3_bucket_name,
                        key=vertices_asset.s3_object_key,
                    ),
                }),
            ))

        core.CfnOutput(
            self,
            'Command2LoadEdges',
            value='curl -X POST -H \'{headers}\' {url} -d \'{request_body}\''.
            format(
                headers='Content-Type: application/json',
                url='https://{endpoint}:8182/loader'.format(
                    endpoint=cluster.endpoint),
                request_body=json_encode({
                    'failOnError':
                    'FALSE',
                    'format':
                    'csv',
                    'region':
                    stack.region,
                    'iamRoleArn':
                    cluster.role.role_arn,
                    'source':
                    's3://{bucket}/{key}'.format(
                        bucket=edges_asset.s3_bucket_name,
                        key=edges_asset.s3_object_key,
                    ),
                }),
            ))

        core.CfnOutput(
            self,
            'Command3ListAllVertices',
            value=':remote connect tinkerpop.server conf/neptune-remote.yaml',
        )

        core.CfnOutput(
            self,
            'Command4ListAllVertices',
            value=':remote console',
        )

        core.CfnOutput(
            self,
            'Command5ListAllGamers',
            value='g.V().hasLabel("person")',
        )

        core.CfnOutput(
            self,
            'Command6ListAllGamers',
            value='g.V().hasLabel("game").groupCount().by("GameGenre")',
        )

        core.CfnOutput(
            self,
            'Command7ListAllGamers',
            value=
            'g.V().has("GamerAlias","groundWalker").as("TargetGamer").out("likes").aggregate("self").in("likes").where(neq("TargetGamer")).out("likes").where(without("self")).dedup().values("GameTitle")',
        )
Exemple #16
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        source_directory: Union[Path, str],
        stage_config: Optional[dict] = None,
        lambda_configs: Optional[dict] = None,
        environment: Optional[dict] = None,
        **kwargs,
    ):
        """
        Args:
            scope: cdk stack or construct
            id: identifier
            source_directory: the output directory of `chalice package` or the base path of the
                              chalice codebase
            environment: environment variables to apply across lambdas
            stage_config: stage-level configuration options i.e. `api_gateway_endpoint_type`
                          overwrites `dev`
            lambda_configs: lambda-level configurations, will be passed to `lambda_functions`
                            in `dev`
            **kwargs:
        """

        super().__init__(scope, id, **kwargs)

        stage_config = stage_config if stage_config is not None else {}

        lambda_configs = lambda_configs if lambda_configs is not None else {}

        environment = environment if environment is not None else {}

        source_path = Path(source_directory)

        if Path(source_path, "app.py").exists():

            logging.debug("assuming app has not been packaged")

            config_path = Path(source_path, ".chalice", "config.json")

            original_config_text = config_path.read_text()

            config_data = json.loads(original_config_text)

            config_data["stages"]["dev"].update(stage_config)

            if lambda_configs:

                config_data["stages"]["dev"]["lambda_functions"] = {
                    **config_data["stages"]["dev"].get("lambda_function", {}),
                    **lambda_configs,
                }

            updated_config = json.dumps(config_data, indent=2)

            logging.debug(updated_config)

            config_path.write_text(updated_config)

            output_dir = "chalice.out"

            sp.run(f"chalice package {output_dir}", shell=True, check=True)

            config_path.write_text(original_config_text)

            package_path = Path(output_dir)

        else:

            package_path = Path(source_directory)

        sam_path = Path(package_path, "sam.json")

        text = sam_path.read_text()

        self.template = json.loads(text)

        zip_path = Path(package_path, "deployment.zip")

        s3_asset = aws_s3_assets.Asset(self,
                                       "chalice-app-s3-object",
                                       path=zip_path.__fspath__())

        for resource_name, resource in self.template["Resources"].items():

            if resource["Type"] == "AWS::Serverless::Function":

                properties = resource["Properties"]

                properties["CodeUri"] = {
                    "Bucket": s3_asset.s3_bucket_name,
                    "Key": s3_asset.s3_object_key,
                }

                properties.setdefault("Environment",
                                      {}).setdefault("Variables",
                                                     {}).update(environment)

        core.CfnInclude(self, "chalice-app", template=self.template)
    def __init__(
        self, scope: core.Construct, id: str, vpc_stack, kafka_stack, **kwargs
    ) -> None:
        super().__init__(scope, id, **kwargs)

        # log generator asset
        log_generator_py = assets.Asset(
            self, "log_generator", path=os.path.join(dirname, "log_generator.py")
        )
        # log generator requirements.txt asset
        log_generator_requirements_txt = assets.Asset(
            self,
            "log_generator_requirements_txt",
            path=os.path.join(dirname, "log_generator_requirements.txt"),
        )

        # get kakfa brokers
        kafka_brokers = f'''"{kafka_get_brokers().replace(",", '", "')}"'''

        # update filebeat.yml to .asset
        filebeat_yml_asset = file_updated(
            os.path.join(dirname, "filebeat.yml"), {"$kafka_brokers": kafka_brokers},
        )
        filebeat_yml = assets.Asset(self, "filebeat_yml", path=filebeat_yml_asset)
        elastic_repo = assets.Asset(
            self, "elastic_repo", path=os.path.join(dirname, "elastic.repo")
        )
        # userdata for Filebeat
        fb_userdata = user_data_init(log_group_name="elkk/filebeat/instance")
        # instance for Filebeat
        fb_instance = ec2.Instance(
            self,
            "filebeat_client",
            instance_type=ec2.InstanceType(constants["FILEBEAT_INSTANCE"]),
            machine_image=ec2.AmazonLinuxImage(
                generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2
            ),
            vpc=vpc_stack.get_vpc,
            vpc_subnets={"subnet_type": ec2.SubnetType.PUBLIC},
            key_name=constants["KEY_PAIR"],
            security_group=kafka_stack.get_kafka_client_security_group,
            user_data=fb_userdata,
        )
        core.Tag.add(fb_instance, "project", constants["PROJECT_TAG"])

        # create policies for EC2 to connect to kafka
        access_kafka_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            actions=["kafka:ListClusters", "kafka:GetBootstrapBrokers",],
            resources=["*"],
        )
        # add the role permissions
        fb_instance.add_to_role_policy(statement=access_kafka_policy)
        # add log permissions
        instance_add_log_permissions(fb_instance)
        # add access to the file asset
        filebeat_yml.grant_read(fb_instance)
        elastic_repo.grant_read(fb_instance)
        log_generator_py.grant_read(fb_instance)
        log_generator_requirements_txt.grant_read(fb_instance)
        # add commands to the userdata
        fb_userdata.add_commands(
            # get setup assets files
            f"aws s3 cp s3://{filebeat_yml.s3_bucket_name}/{filebeat_yml.s3_object_key} /home/ec2-user/filebeat.yml",
            f"aws s3 cp s3://{elastic_repo.s3_bucket_name}/{elastic_repo.s3_object_key} /home/ec2-user/elastic.repo",
            f"aws s3 cp s3://{log_generator_py.s3_bucket_name}/{log_generator_py.s3_object_key} /home/ec2-user/log_generator.py",
            f"aws s3 cp s3://{log_generator_requirements_txt.s3_bucket_name}/{log_generator_requirements_txt.s3_object_key} /home/ec2-user/requirements.txt",
            # get python3
            "yum install python3 -y",
            # get pip
            "yum install python-pip -y",
            # make log generator executable
            "chmod +x /home/ec2-user/log_generator.py",
            # get log generator requirements
            "python3 -m pip install -r /home/ec2-user/requirements.txt",
            # Filebeat
            "rpm --import https://packages.elastic.co/GPG-KEY-elasticsearch",
            # move Filebeat repo file
            "mv -f /home/ec2-user/elastic.repo /etc/yum.repos.d/elastic.repo",
            # install Filebeat
            "yum install filebeat -y",
            # move filebeat.yml to final location
            "mv -f /home/ec2-user/filebeat.yml /etc/filebeat/filebeat.yml",
            # update log generator ownership
            "chown -R ec2-user:ec2-user /home/ec2-user",
            # start Filebeat
            "systemctl start filebeat",
        )
        # add the signal
        fb_userdata.add_signal_on_exit_command(resource=fb_instance)
        # attach the userdata
        fb_instance.add_user_data(fb_userdata.render())
        # add creation policy for instance
        fb_instance.instance.cfn_options.creation_policy = core.CfnCreationPolicy(
            resource_signal=core.CfnResourceSignal(count=1, timeout="PT10M")
        )
Exemple #18
0
    def __init__(self, app: core.App, cfn_name: str, stack_env):
        super().__init__(scope=app, id=f"{cfn_name}-{stack_env}")

        glue_code = s3_assets.Asset(
            scope=self,
            id=f"{cfn_name}-glue-script",
            path="./glue_script/glue_job_script.py",
        )

        glue_s3_access_role = iam.Role(
            scope=self,
            id=f"glue_s3_access_role_{stack_env}",
            role_name=f"glue_s3_access_role_{stack_env}",
            assumed_by=iam.ServicePrincipal("glue.amazonaws.com"))

        # add policy to access S3
        glue_s3_access_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=["*"],
                                actions=["s3:*"]))

        # add policy to access CloudWatch Logs
        glue_s3_access_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=["arn:aws:logs:*:*:*"],
                                actions=[
                                    "logs:CreateLogGroup",
                                    "logs:CreateLogStream",
                                    "logs:PutLogEvents",
                                    "logs:DescribeLogStreams"
                                ]))

        # glue
        # specify the name, because `the name` deployed cannot be obtained.
        glue_job_name = f"{cfn_name}-glue-job"
        _ = glue.CfnJob(
            scope=self,
            id=glue_job_name,
            name=glue_job_name,
            command=glue.CfnJob.JobCommandProperty(
                # glueetl or pythonshell
                name=self.GLUE_JOB_COMMAND_GLUE_ETL,
                script_location=
                f"s3://{glue_code.s3_bucket_name}/{glue_code.s3_object_key}"),
            # set role-name!
            role=glue_s3_access_role.role_name,
            glue_version=self.GLUE_VERSION_2_0,
            number_of_workers=2,
            worker_type=self.GLUE_WORKER_TYPE_STANDARD,
            timeout=1800)

        # StepFunction Tasks
        sfn_task_pass = sfn.Pass(scope=self,
                                 id=f"{cfn_name}-sfn-pass",
                                 comment="pass example",
                                 input_path="$",
                                 result_path="$.source",
                                 result=sfn.Result.from_string("example"),
                                 output_path="$")

        # wait until the JOB completed: sfn.IntegrationPattern.RUN_JOB
        # process next step without waiting: sfn.IntegrationPattern.REQUEST_RESPONSE
        sfn_task_glue_job = sfn_tasks.GlueStartJobRun(
            scope=self,
            id=f"{cfn_name}-sfn-lambda-task",
            glue_job_name=glue_job_name,
            integration_pattern=sfn.IntegrationPattern.RUN_JOB,
            input_path="$",
            result_path="$.result",
            output_path="$.output")

        # stepfunctions
        definition = sfn_task_pass.next(sfn_task_glue_job)

        _ = sfn.StateMachine(scope=self,
                             id=f"{cfn_name}-SFn-{stack_env}",
                             definition=definition)
Exemple #19
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        vpc_name = self.node.try_get_context("vpc_name")
        vpc = aws_ec2.Vpc.from_lookup(self,
                                      "ExistingVPC",
                                      is_default=True,
                                      vpc_name=vpc_name)

        # vpc = aws_ec2.Vpc(self, "JenkinsOnEC2Stack",
        #   max_azs=2,
        #   gateway_endpoints={
        #     "S3": aws_ec2.GatewayVpcEndpointOptions(
        #       service=aws_ec2.GatewayVpcEndpointAwsService.S3
        #     )
        #   }
        # )

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_jenkins_host = aws_ec2.SecurityGroup(
            self,
            "JenkinsHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an jenkins host',
            security_group_name='jenkins-host-sg')
        cdk.Tags.of(sg_jenkins_host).add('Name', 'jenkins-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')
        sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(80),
                                         description='HTTP access')

        jenkins_host = aws_ec2.Instance(
            self,
            "JenkinsHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(
                generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
                edition=aws_ec2.AmazonLinuxEdition.STANDARD,
                kernel=aws_ec2.AmazonLinuxKernel.KERNEL5_X),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_jenkins_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        # Script in S3 as Asset
        user_data_asset = aws_s3_assets.Asset(
            self,
            "JenkinsEC2UserData",
            path=os.path.join(os.path.dirname(__file__),
                              "user-data/install_jenkins.sh"))

        local_path = jenkins_host.user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key)

        # Userdata executes script from S3
        jenkins_host.user_data.add_execute_file_command(file_path=local_path)
        user_data_asset.grant_read(jenkins_host.role)

        cdk.CfnOutput(self,
                      'JenkinsHostId',
                      value=jenkins_host.instance_id,
                      export_name='JenkinsHostId')
        cdk.CfnOutput(self,
                      'JenkinsHostPublicDNSName',
                      value=jenkins_host.instance_public_dns_name,
                      export_name='JenkinsHostPublicDNSName')
    def __init__(
        self,
        scope: cdk.Construct,
        construct_id: str,
        stack_log_level: str,
        glue_db_name: str,
        glue_table_name: str,
        etl_bkt,
        src_stream,
        **kwargs,
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        self.template_options.metadata = {"License": "Miztiik Corp."}

        # Glue Job IAM Role
        self._glue_etl_role = _iam.Role(
            self,
            "glueJobRole",
            assumed_by=_iam.ServicePrincipal("glue.amazonaws.com"),
            managed_policies=[
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3ReadOnlyAccess"),
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AWSGlueServiceRole")
            ])
        self._glue_etl_role.add_to_policy(
            _iam.PolicyStatement(
                actions=["s3:*"],
                resources=[f"{etl_bkt.bucket_arn}",
                           f"{etl_bkt.bucket_arn}/*"]))

        self._glue_etl_role.add_to_policy(
            _iam.PolicyStatement(actions=["kinesis:DescribeStream"],
                                 resources=[f"{src_stream.stream_arn}"]))

        src_stream.grant_read(self._glue_etl_role)

        # Create the Glue job to convert incoming JSON to parquet
        # Read GlueSpark Code
        try:
            with open(
                    "stacks/back_end/glue_stacks/glue_job_scripts/kinesis_streams_batch_to_s3_etl.py",
                    encoding="utf-8",
                    mode="r",
            ) as f:
                kinesis_streams_batch_to_s3_etl = f.read()
        except OSError:
            print("Unable to read Glue Job Code")
            raise

        etl_script_asset = _s3_assets.Asset(
            self,
            "etlScriptAsset",
            path=
            "stacks/back_end/glue_stacks/glue_job_scripts/kinesis_streams_batch_to_s3_etl.py"
        )

        self.etl_prefix = "stream-etl"
        _glue_etl_job = _glue.CfnJob(
            self,
            "glueJsonToParquetJob",
            name="stream-etl-processor",
            description=
            "Glue Job to process stream of events from Kinesis data stream and store them in parquet format in S3",
            role=self._glue_etl_role.role_arn,
            glue_version="2.0",
            command=_glue.CfnJob.JobCommandProperty(
                name="gluestreaming",
                script_location=
                f"s3://{etl_script_asset.s3_bucket_name}/{etl_script_asset.s3_object_key}",
                python_version="3"),
            default_arguments={
                "--src_db_name": glue_db_name,
                "--src_tbl_name": glue_table_name,
                "--datalake_bkt_name": etl_bkt.bucket_name,
                "--datalake_bkt_prefix": f"{self.etl_prefix}/",
                "--job-bookmark-option": "job-bookmark-enable"
            },
            allocated_capacity=1,
            # timeout=2,
            max_retries=2,
            execution_property=_glue.CfnJob.ExecutionPropertyProperty(
                max_concurrent_runs=1))

        # Configure a Trigger - Every hour
        _glue_etl_job_trigger = _glue.CfnTrigger(
            self,
            "glueEtlJobtrigger",
            type="SCHEDULED",
            description=
            "Miztiik Automation: Trigger streaming etl glue job every hour",
            schedule="cron(0 1 * * ? *)",
            start_on_creation=False,
            actions=[
                _glue.CfnTrigger.ActionProperty(
                    job_name=f"{_glue_etl_job.name}", timeout=2)
            ])
        _glue_etl_job_trigger.add_depends_on(_glue_etl_job)

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = cdk.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page.",
        )

        output_1 = cdk.CfnOutput(
            self,
            "StreamingETLGlueJob",
            value=
            f"https://console.aws.amazon.com/gluestudio/home?region={cdk.Aws.REGION}#/jobs",
            description="Glue ETL Job.",
        )
Exemple #21
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        domain_name = self.node.try_get_context('domain_name')

        subdomain = 'enclave.{}'.format(domain_name)

        zone = route53.HostedZone.from_lookup(
            self,
            'Zone',
            domain_name=domain_name,
        )

        certificate = acm.DnsValidatedCertificate(
            self,
            'Certificate',
            domain_name=subdomain,
            hosted_zone=zone,
        )

        vpc = ec2.Vpc(
            self,
            'Vpc',
            cidr='10.11.12.0/24',
            max_azs=2,
            # Only need public IPs, so no need for private subnets
            subnet_configuration=[
                ec2.SubnetConfiguration(name='public',
                                        subnet_type=ec2.SubnetType.PUBLIC)
            ])

        role = iam.Role(
            self,
            'Ec2SsmRole',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSSMManagedInstanceCore')
            ],
        )

        role.add_to_policy(
            iam.PolicyStatement(
                actions=['ec2:AssociateEnclaveCertificateIamRole'],
                resources=[
                    certificate.certificate_arn,
                    role.role_arn,
                ],
            ))

        role.add_to_policy(
            iam.PolicyStatement(
                actions=['s3:GetObject'],
                resources=['arn:aws:s3:::aws-ec2-enclave-certificate-*/*'],
            ))

        role.add_to_policy(
            iam.PolicyStatement(
                actions=['kms:Decrypt'],
                resources=['arn:aws:kms:*:*:key/*'],
            ))

        role.add_to_policy(
            iam.PolicyStatement(
                actions=['iam:GetRole'],
                resources=[role.role_arn],
            ))

        nginx_config = s3_assets.Asset(
            self,
            'NginxConfig',
            path='./files/nginx.conf',
            readers=[role],
        )

        enclave_config = s3_assets.Asset(
            self,
            'EncalveConfig',
            path='./files/acm.yaml',
            readers=[role],
        )

        # Source: https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave-refapp.html
        user_data = ec2.UserData.for_linux()
        user_data.add_commands(
            'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"',
            'unzip awscliv2.zip',
            './aws/install',
            '/usr/local/bin/aws ec2 associate-enclave-certificate-iam-role --certificate-arn {certificate_arn} --role-arn {role_arn} --region {region}'
            .format(
                certificate_arn=certificate.certificate_arn,
                role_arn=role.role_arn,
                region=self.region,
            ),
            'aws s3 cp s3://{bucket}/{key} /etc/nginx/nginx.conf'.format(
                bucket=nginx_config.s3_bucket_name,
                key=nginx_config.s3_object_key,
            ),
            'sed -i "s+DOMAIN_NAME+{domain_name}+g" /etc/nginx/nginx.conf'.
            format(domain_name=subdomain, ),
            'aws s3 cp s3://{bucket}/{key} /etc/nitro_enclaves/acm.yaml'.
            format(
                bucket=enclave_config.s3_bucket_name,
                key=enclave_config.s3_object_key,
            ),
            'sed -i "s+CERTIFICATE_ARN+{certificate_arn}+g" /etc/nitro_enclaves/acm.yaml'
            .format(certificate_arn=certificate.certificate_arn, ),
            'systemctl start nitro-enclaves-acm.service',
            'systemctl enable nitro-enclaves-acm',
        )

        instance = ec2.Instance(
            self,
            'Instance',
            role=role,
            vpc=vpc,
            user_data=user_data,
            # AWS Marketplace AMI: AWS Certificate Manager for Nitro Enclaves
            # Source: https://aws.amazon.com/marketplace/server/configuration?productId=3f5ee4f8-1439-4bce-ac57-e794a4ca82f9&ref_=psb_cfg_continue
            machine_image=ec2.MachineImage.lookup(
                name='ACM-For-Nitro-Enclaves-*',
                owners=['679593333241'],
            ),
            # Nitro Enclaves requires at least 4 vCPUs and does not run on Graviton
            instance_type=ec2.InstanceType.of(
                instance_class=ec2.InstanceClass.COMPUTE5_AMD,
                instance_size=ec2.InstanceSize.XLARGE,
            ),
        )

        # Unsupported property by CDK
        instance.instance.enclave_options = {'enabled': True}

        # Allow inbound HTTPS requests
        instance.connections.allow_from_any_ipv4(ec2.Port.tcp(443))

        # CDK route53 construct does not support EC2 instance as target
        route53.CfnRecordSet(
            self,
            'DnsRecord',
            name=subdomain,
            type='A',
            ttl='60',
            resource_records=[instance.instance_public_ip],
            hosted_zone_id=zone.hosted_zone_id,
        )
Exemple #22
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # store
        dynamodb_table = dynamodb.Table(
            self,
            'dynamodb_table',
            table_name=f'{PROJECT}_{STAGE}',
            partition_key=dynamodb.Attribute(
                name='date', type=dynamodb.AttributeType.STRING),
            billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
            point_in_time_recovery=False,
            removal_policy=core.RemovalPolicy.DESTROY,
            server_side_encryption=True,
        )

        # public api
        public_api = appsync.CfnGraphQLApi(
            self,
            'public_api',
            name=f'{PROJECT}_{STAGE}',
            authentication_type='API_KEY',
        )

        now = time.localtime()
        epoch = time.mktime(now)
        public_api_key = appsync.CfnApiKey(
            self,
            'public_api_key',
            api_id=public_api.attr_api_id,
            expires=epoch + core.Duration.days(90).to_seconds(),
        )

        with open('schema.gql', mode='r') as f:
            graphql_schema = f.read()

            appsync.CfnGraphQLSchema(self,
                                     'public_api_schema',
                                     api_id=public_api.attr_api_id,
                                     definition=graphql_schema)

        public_api_role = iam.Role(
            self,
            'public_api_role',
            assumed_by=iam.ServicePrincipal('appsync.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonDynamoDBFullAccess')
            ],
        )

        public_api_datasource = appsync.CfnDataSource(
            self,
            'public_api_datasource',
            api_id=public_api.attr_api_id,
            name=f'{PROJECT}_{STAGE}_dynamodb',
            type='AMAZON_DYNAMODB',
            dynamo_db_config={
                'awsRegion': 'us-east-1',
                'tableName': dynamodb_table.table_name,
            },
            service_role_arn=public_api_role.role_arn,
        )

        with open('mapping_templates/get_holiday.json', mode='r') as f:
            get_holiday_json = f.read()

            appsync.CfnResolver(
                self,
                'public_api_resolver_get_holiday',
                api_id=public_api.attr_api_id,
                type_name='Query',
                field_name='getHoliday',
                data_source_name=public_api_datasource.attr_name,
                kind='UNIT',
                request_mapping_template=get_holiday_json,
                response_mapping_template='$util.toJson($context.result)',
            )

        with open('mapping_templates/list_holidays.json', mode='r') as f:
            list_holidays_json = f.read()

            appsync.CfnResolver(
                self,
                'public_api_resolver_list_holidays',
                api_id=public_api.attr_api_id,
                type_name='Query',
                field_name='listHolidays',
                data_source_name=public_api_datasource.attr_name,
                kind='UNIT',
                request_mapping_template=list_holidays_json,
                response_mapping_template='$util.toJson($context.result)',
            )

        # lambda source code upload to s3
        lambda_assets = s3_assets.Asset(self,
                                        'lambda_assets',
                                        path='./function/.artifact/')

        # update function
        func_api = lambda_.Function(
            self,
            f'{PROJECT}-{STAGE}-func',
            function_name=f'{PROJECT}-{STAGE}-func',
            code=lambda_.Code.from_bucket(bucket=lambda_assets.bucket,
                                          key=lambda_assets.s3_object_key),
            handler='app.handler',
            runtime=lambda_.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(120),
            log_retention=logs.RetentionDays.SIX_MONTHS,
            memory_size=128,
            tracing=lambda_.Tracing.ACTIVE,
        )
        func_api.add_environment('TABLE_NAME', dynamodb_table.table_name)
        func_api.add_environment('CSV_URL', CSV_URL)
        func_api.add_to_role_policy(
            iam.PolicyStatement(
                actions=[
                    'dynamodb:Get*',
                    'dynamodb:Put*',
                    'dynamodb:Batch*',
                ],
                resources=[dynamodb_table.table_arn],
            ))

        # schedule execute
        events.Rule(
            self,
            f'{PROJECT}-{STAGE}-schedule',
            enabled=True,
            schedule=events.Schedule.rate(core.Duration.days(10)),
            targets=[events_targets.LambdaFunction(func_api)],
        )

        # lambda@edge
        func_lambdaedge = lambda_.Function(
            self,
            f'{PROJECT}-{STAGE}-func-lambdaedge',
            function_name=f'{PROJECT}-{STAGE}-func-lambdaedge',
            code=lambda_.Code.from_inline(
                open('./function/src/lambdaedge.py').read().replace(
                    '__X_API_KEY__', public_api_key.attr_api_key)),
            handler='index.handler',
            runtime=lambda_.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(30),
            memory_size=128,
            role=iam.Role(
                self,
                f'{PROJECT}-{STAGE}-func-lambdaedge-role',
                assumed_by=iam.CompositePrincipal(
                    iam.ServicePrincipal('edgelambda.amazonaws.com'),
                    iam.ServicePrincipal('lambda.amazonaws.com'),
                ),
                managed_policies=[
                    iam.ManagedPolicy.from_aws_managed_policy_name(
                        'service-role/AWSLambdaBasicExecutionRole'),
                ],
            ),
        )
        lambdaedge_version = func_lambdaedge.add_version(
            hashlib.sha256(
                open('./function/src/lambdaedge.py').read().replace(
                    '__X_API_KEY__',
                    public_api_key.attr_api_key).encode()).hexdigest())

        # ACM
        certificates = acm.Certificate(
            self,
            'certificates',
            domain_name=DOMAIN,
            validation_method=acm.ValidationMethod.DNS,
        )

        # CDN
        cdn = cloudfront.CloudFrontWebDistribution(
            self,
            f'{PROJECT}-{STAGE}-cloudfront',
            origin_configs=[
                cloudfront.SourceConfiguration(
                    behaviors=[
                        # default behavior
                        cloudfront.Behavior(
                            allowed_methods=cloudfront.
                            CloudFrontAllowedMethods.ALL,
                            default_ttl=core.Duration.seconds(0),
                            max_ttl=core.Duration.seconds(0),
                            min_ttl=core.Duration.seconds(0),
                            is_default_behavior=True,
                            lambda_function_associations=[
                                cloudfront.LambdaFunctionAssociation(
                                    event_type=cloudfront.LambdaEdgeEventType.
                                    ORIGIN_REQUEST,
                                    lambda_function=lambdaedge_version,
                                ),
                            ])
                    ],
                    custom_origin_source=cloudfront.CustomOriginConfig(
                        domain_name=core.Fn.select(
                            2, core.Fn.split('/',
                                             public_api.attr_graph_ql_url)), ),
                )
            ],
            alias_configuration=cloudfront.AliasConfiguration(
                acm_cert_ref=certificates.certificate_arn,
                names=[DOMAIN],
                security_policy=cloudfront.SecurityPolicyProtocol.
                TLS_V1_2_2018,
            ),
            price_class=cloudfront.PriceClass.PRICE_CLASS_ALL,
        )
        core.CfnOutput(
            self,
            'cloudfront-domain',
            value=cdn.domain_name,
        )
Exemple #23
0
    def __init__(self, scope: core.Construct, id: str, vpc: ec2.IVpc,
                 cluster: neptune.CfnDBCluster, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        role = iam.Role(
            self,
            'Ec2Role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonSSMManagedInstanceCore'),
            ],
        )

        config_asset = s3_assets.Asset(
            self,
            'ConfigYaml',
            path='./files/neptune-remote.yaml',
            readers=[role],
        )

        sg = ec2.SecurityGroup(
            self,
            'SecurityGroup',
            vpc=vpc,
        )

        user_data = ec2.UserData.for_linux()
        user_data.add_commands(
            'yum update -y',
            'yum install -y java-1.8.0-devel',
            'cd ~',  # Execute subsequent commands in home directory
            'wget https://archive.apache.org/dist/tinkerpop/3.4.1/apache-tinkerpop-gremlin-console-3.4.1-bin.zip',
            'unzip apache-tinkerpop-gremlin-console-3.4.1-bin.zip',
            'cd apache-tinkerpop-gremlin-console-3.4.1',
            'wget https://www.amazontrust.com/repository/SFSRootCAG2.pem',
            'aws s3 cp s3://{bucket}/{key} conf/neptune-remote.yaml'.format(
                bucket=config_asset.s3_bucket_name,
                key=config_asset.s3_object_key,
            ),
            'sed -i "s/ENDPOINT_URL/{endpoint_url}/g" conf/neptune-remote.yaml'
            .format(endpoint_url=cluster.endpoint, ),
            'systemctl start awslogsd',
        )

        ec2.Instance(
            self,
            'Instance',
            role=role,
            vpc=vpc,
            security_group=sg,
            user_data=user_data,
            instance_type=ec2.InstanceType.of(
                instance_class=ec2.InstanceClass.BURSTABLE3_AMD,
                instance_size=ec2.InstanceSize.NANO,
            ),
            machine_image=ec2.AmazonLinuxImage(
                generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, ),
        )

        self.role = role
        self.security_group = sg
Exemple #24
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # IAM resources

        function_role = iam.Role(
            self, 'LambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
            ],
        )
        function_role.add_to_policy(
            iam.PolicyStatement(
                actions=['ec2:TerminateInstances'],
                resources=['*'],
            )
        )

        instance_role = iam.Role(
            self, 'Ec2Role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore'),
                iam.ManagedPolicy.from_aws_managed_policy_name('CloudWatchAgentAdminPolicy'),
            ],
        )

        # Lambda resources

        function = lambda_.Function(
            self, 'Shutdown',
            runtime=lambda_.Runtime.PYTHON_3_7,  # Current version on my machines
            code=lambda_.Code.from_asset('files/shutdown'),
            handler='index.handler',
            role=function_role,
        )

        # Log resources

        awslogs_config = s3_assets.Asset(
            self, 'AwslogsConfig',
            path='./files/awslogs.conf',
            readers=[instance_role],
        )

        log_group = logs.LogGroup(
            self, 'LogSecure',
            removal_policy=core.RemovalPolicy.DESTROY,
        )

        logs.SubscriptionFilter(
            self, 'SshdSession',
            log_group=log_group,
            filter_pattern=logs.FilterPattern.all_terms('sshd', 'session opened'),
            destination=logs_destinations.LambdaDestination(function)
        )

        ## EC2 resources

        vpc = ec2.Vpc(
            self, 'Vpc',
            cidr='10.0.0.0/24',
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    name='Public',
                    subnet_type=ec2.SubnetType.PUBLIC,
                )
            ],
        )

        key_pair = core.CfnParameter(
            self, 'KeyPair',
            type='AWS::EC2::KeyPair::KeyName',
        )

        # https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/EC2NewInstanceCWL.html
        user_data = ec2.UserData.for_linux()
        user_data.add_commands(
            'yum update -y',
            'yum install -y awslogs',
            'aws s3 cp s3://{bucket}/{key} /etc/awslogs/awslogs.conf'.format(
                bucket=awslogs_config.s3_bucket_name,
                key=awslogs_config.s3_object_key,
            ),
            'sed -i "s/LOG_GROUP_NAME/{log_group_name}/g" /etc/awslogs/awslogs.conf'.format(
                log_group_name=log_group.log_group_name,
            ),
            'sed -i "s/us-east-1/{region}/g" /etc/awslogs/awscli.conf'.format(
                region=self.region,
            ),
            'systemctl start awslogsd',
        )

        # Using an autoscaling group to utilize the rolling update
        asg = autoscaling.AutoScalingGroup(
            self, 'Instance',
            role=instance_role,
            vpc=vpc,
            user_data=user_data,
            key_name=key_pair.value_as_string,
            instance_type=ec2.InstanceType.of(
                instance_class=ec2.InstanceClass.BURSTABLE4_GRAVITON,
                instance_size=ec2.InstanceSize.NANO,
            ),
            machine_image=ec2.AmazonLinuxImage(
              generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
              edition=ec2.AmazonLinuxEdition.STANDARD,
              cpu_type=ec2.AmazonLinuxCpuType.ARM_64,
            ),
            min_capacity=3,
            max_capacity=3,
            update_type=autoscaling.UpdateType.ROLLING_UPDATE,
            rolling_update_configuration=autoscaling.RollingUpdateConfiguration(
                max_batch_size=3,
            )
        )

        asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22))
Exemple #25
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 stream: kinesis.IStream, kda_path: str,
                 database: timestream.CfnDatabase, table: timestream.CfnTable,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        asset = assets.Asset(self, "flink-source", path=kda_path)

        log_group = logs.LogGroup(self,
                                  "KdaLogGroup",
                                  retention=RetentionDays.FIVE_DAYS,
                                  removal_policy=RemovalPolicy.DESTROY)
        log_stream = log_group.add_stream("KdaLogStream")

        kda_role = iam.Role(
            self,
            "KdaRole",
            assumed_by=iam.ServicePrincipal("kinesisanalytics.amazonaws.com"),
        )

        asset.grant_read(kda_role)
        stream.grant_read(kda_role)
        cloudwatch.Metric.grant_put_metric_data(kda_role)
        log_group.grant(kda_role, "logs:DescribeLogStreams")
        log_group.grant_write(kda_role)

        kda_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "timestream:DescribeEndpoints",
                "timestream:ListTables",
                "timestream:ListDatabases",
                "timestream:DescribeTable",
                "timestream:DescribeDatabase",
            ],
                                resources=["*"]))

        kda_role.add_to_policy(
            iam.PolicyStatement(actions=["timestream:*Database"],
                                resources=[database.attr_arn]))

        kda_role.add_to_policy(
            iam.PolicyStatement(
                actions=["timestream:*Table", "timestream:WriteRecords"],
                resources=[table.attr_arn]))

        kda_role.add_to_policy(
            iam.PolicyStatement(actions=["kms:DescribeKey"], resources=["*"]))

        kda_role.add_to_policy(
            iam.PolicyStatement(actions=["kms:CreateGrant"],
                                resources=["*"],
                                conditions={
                                    "ForAnyValue:StringEquals": {
                                        "kms:EncryptionContextKeys":
                                        "aws:timestream:database-name"
                                    },
                                    "Bool": {
                                        "kms:GrantIsForAWSResource": True
                                    },
                                    "StringLike": {
                                        "kms:ViaService":
                                        "timestream.*.amazonaws.com"
                                    }
                                }))

        kda_role.add_to_policy(
            iam.PolicyStatement(actions=["kinesis:ListShards"],
                                resources=[stream.stream_arn]))

        self._asset = asset
        self._kda_role = kda_role
        self._log_group_name = log_group.log_group_name
        self._log_stream_name = log_stream.log_stream_name
    def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Version of ParallelCluster for Cloud9.
        pcluster_version = cdk.CfnParameter(
            self,
            'ParallelClusterVersion',
            description=
            'Specify a custom parallelcluster version. See https://pypi.org/project/aws-parallelcluster/#history for options.',
            default='2.8.0',
            type='String',
            allowed_values=get_version_list('aws-parallelcluster'))

        # S3 URI for Config file
        config = cdk.CfnParameter(
            self,
            'ConfigS3URI',
            description='Set a custom parallelcluster config file.',
            default=
            'https://notearshpc-quickstart.s3.amazonaws.com/{0}/config.ini'.
            format(__version__))

        # Password
        password = cdk.CfnParameter(
            self,
            'UserPasswordParameter',
            description='Set a password for the hpc-quickstart user',
            no_echo=True)

        # create a VPC
        vpc = ec2.Vpc(
            self,
            'VPC',
            cidr='10.0.0.0/16',
            gateway_endpoints={
                "S3":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.S3),
                "DynamoDB":
                ec2.GatewayVpcEndpointOptions(
                    service=ec2.GatewayVpcEndpointAwsService.DYNAMODB)
            },
            max_azs=99)

        # create a private and public subnet per vpc
        selection = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE)

        # Output created subnets
        for i, public_subnet in enumerate(vpc.public_subnets):
            cdk.CfnOutput(self,
                          'PublicSubnet%i' % i,
                          value=public_subnet.subnet_id)

        for i, private_subnet in enumerate(vpc.private_subnets):
            cdk.CfnOutput(self,
                          'PrivateSubnet%i' % i,
                          value=private_subnet.subnet_id)

        cdk.CfnOutput(self, 'VPCId', value=vpc.vpc_id)

        # Create a Bucket
        data_bucket = s3.Bucket(self, "DataRepository")
        cdk.CfnOutput(self, 'DataRespository', value=data_bucket.bucket_name)
        cloudtrail_bucket = s3.Bucket(self, "CloudTrailLogs")
        quickstart_bucket = s3.Bucket.from_bucket_name(self,
                                                       'QuickStartBucket',
                                                       'aws-quickstart')

        # Upload Bootstrap Script to that bucket
        bootstrap_script = assets.Asset(self,
                                        'BootstrapScript',
                                        path='scripts/bootstrap.sh')

        # Upload parallel cluster post_install_script to that bucket
        pcluster_post_install_script = assets.Asset(
            self,
            'PclusterPostInstallScript',
            path='scripts/post_install_script.sh')

        # Upload parallel cluster post_install_script to that bucket
        pcluster_config_script = assets.Asset(self,
                                              'PclusterConfigScript',
                                              path='scripts/config.ini')

        # Setup CloudTrail
        cloudtrail.Trail(self, 'CloudTrail', bucket=cloudtrail_bucket)

        # Create a Cloud9 instance
        # Cloud9 doesn't have the ability to provide userdata
        # Because of this we need to use SSM run command
        cloud9_instance = cloud9.Ec2Environment(
            self,
            'ResearchWorkspace',
            vpc=vpc,
            instance_type=ec2.InstanceType(
                instance_type_identifier='c5.large'))
        cdk.CfnOutput(self,
                      'Research Workspace URL',
                      value=cloud9_instance.ide_url)

        # Create a keypair in lambda and store the private key in SecretsManager
        c9_createkeypair_role = iam.Role(
            self,
            'Cloud9CreateKeypairRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        c9_createkeypair_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        # Add IAM permissions to the lambda role
        c9_createkeypair_role.add_to_policy(
            iam.PolicyStatement(
                actions=['ec2:CreateKeyPair', 'ec2:DeleteKeyPair'],
                resources=['*'],
            ))

        # Lambda for Cloud9 keypair
        c9_createkeypair_lambda = _lambda.Function(
            self,
            'C9CreateKeyPairLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(300),
            role=c9_createkeypair_role,
            code=_lambda.Code.asset('functions/source/c9keypair'),
        )

        c9_createkeypair_provider = cr.Provider(
            self,
            "C9CreateKeyPairProvider",
            on_event_handler=c9_createkeypair_lambda)

        c9_createkeypair_cr = cfn.CustomResource(
            self,
            "C9CreateKeyPair",
            provider=c9_createkeypair_provider,
            properties={'ServiceToken': c9_createkeypair_lambda.function_arn})
        #c9_createkeypair_cr.node.add_dependency(instance_id)
        c9_ssh_private_key_secret = secretsmanager.CfnSecret(
            self,
            'SshPrivateKeySecret',
            secret_string=c9_createkeypair_cr.get_att_string('PrivateKey'))

        # The iam policy has a <REGION> parameter that needs to be replaced.
        # We do it programmatically so future versions of the synth'd stack
        # template include all regions.
        with open('iam/ParallelClusterUserPolicy.json') as json_file:
            data = json.load(json_file)
            for s in data['Statement']:
                if s['Sid'] == 'S3ParallelClusterReadOnly':
                    s['Resource'] = []
                    for r in region_info.RegionInfo.regions:
                        s['Resource'].append(
                            'arn:aws:s3:::{0}-aws-parallelcluster*'.format(
                                r.name))

            parallelcluster_user_policy = iam.CfnManagedPolicy(
                self,
                'ParallelClusterUserPolicy',
                policy_document=iam.PolicyDocument.from_json(data))

        # Cloud9 IAM Role
        cloud9_role = iam.Role(
            self,
            'Cloud9Role',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonSSMManagedInstanceCore'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User'))
        cloud9_role.add_managed_policy(
            iam.ManagedPolicy.from_managed_policy_arn(
                self, 'AttachParallelClusterUserPolicy',
                parallelcluster_user_policy.ref))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(resources=['*'],
                                actions=[
                                    'ec2:DescribeInstances',
                                    'ec2:DescribeVolumes', 'ec2:ModifyVolume'
                                ]))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(resources=[c9_ssh_private_key_secret.ref],
                                actions=['secretsmanager:GetSecretValue']))
        cloud9_role.add_to_policy(
            iam.PolicyStatement(
                actions=["s3:Get*", "s3:List*"],
                resources=[
                    "arn:aws:s3:::%s/*" % (data_bucket.bucket_name),
                    "arn:aws:s3:::%s" % (data_bucket.bucket_name)
                ]))

        bootstrap_script.grant_read(cloud9_role)
        pcluster_post_install_script.grant_read(cloud9_role)
        pcluster_config_script.grant_read(cloud9_role)

        # Admin Group
        admin_group = iam.Group(self, 'AdminGroup')
        admin_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AdministratorAccess'))
        admin_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AWSCloud9Administrator'))

        # PowerUser Group
        poweruser_group = iam.Group(self, 'PowerUserGroup')
        poweruser_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name('PowerUserAccess'))
        poweruser_group.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AWSCloud9Administrator'))

        # HPC User
        user = iam.CfnUser(
            self,
            'Researcher',
            groups=[admin_group.node.default_child.ref],
            login_profile=iam.CfnUser.LoginProfileProperty(
                password_reset_required=True,
                password=cdk.SecretValue.cfn_parameter(password).to_string()))

        create_user = cdk.CfnParameter(self,
                                       "CreateUser",
                                       default="false",
                                       type="String",
                                       allowed_values=['true', 'false'
                                                       ]).value_as_string
        user_condition = cdk.CfnCondition(self,
                                          "UserCondition",
                                          expression=cdk.Fn.condition_equals(
                                              create_user, "true"))
        user.cfn_options.condition = user_condition

        cdk.CfnOutput(self,
                      'UserLoginUrl',
                      value="".join([
                          "https://", self.account,
                          ".signin.aws.amazon.com/console"
                      ]),
                      condition=user_condition)
        cdk.CfnOutput(self,
                      'UserName',
                      value=user.ref,
                      condition=user_condition)

        # Cloud9 Setup IAM Role
        cloud9_setup_role = iam.Role(
            self,
            'Cloud9SetupRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'))
        cloud9_setup_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        # Allow pcluster to be run in bootstrap
        cloud9_setup_role.add_managed_policy(
            iam.ManagedPolicy.from_managed_policy_arn(
                self, 'AttachParallelClusterUserPolicySetup',
                parallelcluster_user_policy.ref))

        # Add IAM permissions to the lambda role
        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    'cloudformation:DescribeStackResources',
                    'ec2:AssociateIamInstanceProfile',
                    'ec2:AuthorizeSecurityGroupIngress',
                    'ec2:DescribeInstances',
                    'ec2:DescribeInstanceStatus',
                    'ec2:DescribeInstanceAttribute',
                    'ec2:DescribeIamInstanceProfileAssociations',
                    'ec2:DescribeVolumes',
                    'ec2:DesctibeVolumeAttribute',
                    'ec2:DescribeVolumesModifications',
                    'ec2:DescribeVolumeStatus',
                    'ssm:DescribeInstanceInformation',
                    'ec2:ModifyVolume',
                    'ec2:ReplaceIamInstanceProfileAssociation',
                    'ec2:ReportInstanceStatus',
                    'ssm:SendCommand',
                    'ssm:GetCommandInvocation',
                    's3:GetObject',
                    'lambda:AddPermission',
                    'lambda:RemovePermission',
                    'events:PutRule',
                    'events:DeleteRule',
                    'events:PutTargets',
                    'events:RemoveTargets',
                    'cloud9:CreateEnvironmentMembership',
                ],
                resources=['*'],
            ))

        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(actions=['iam:PassRole'],
                                resources=[cloud9_role.role_arn]))

        cloud9_setup_role.add_to_policy(
            iam.PolicyStatement(
                actions=['lambda:AddPermission', 'lambda:RemovePermission'],
                resources=['*']))

        # Cloud9 Instance Profile
        c9_instance_profile = iam.CfnInstanceProfile(
            self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name])

        # Lambda to add Instance Profile to Cloud9
        c9_instance_profile_lambda = _lambda.Function(
            self,
            'C9InstanceProfileLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9InstanceProfile'),
        )

        c9_instance_profile_provider = cr.Provider(
            self,
            "C9InstanceProfileProvider",
            on_event_handler=c9_instance_profile_lambda,
        )

        instance_id = cfn.CustomResource(self,
                                         "C9InstanceProfile",
                                         provider=c9_instance_profile_provider,
                                         properties={
                                             'InstanceProfile':
                                             c9_instance_profile.ref,
                                             'Cloud9Environment':
                                             cloud9_instance.environment_id,
                                         })
        instance_id.node.add_dependency(cloud9_instance)

        # Lambda for Cloud9 Bootstrap
        c9_bootstrap_lambda = _lambda.Function(
            self,
            'C9BootstrapLambda',
            runtime=_lambda.Runtime.PYTHON_3_6,
            handler='lambda_function.handler',
            timeout=cdk.Duration.seconds(900),
            role=cloud9_setup_role,
            code=_lambda.Code.asset('functions/source/c9bootstrap'),
        )

        c9_bootstrap_provider = cr.Provider(
            self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda)

        c9_bootstrap_cr = cfn.CustomResource(
            self,
            "C9Bootstrap",
            provider=c9_bootstrap_provider,
            properties={
                'Cloud9Environment':
                cloud9_instance.environment_id,
                'BootstrapPath':
                's3://%s/%s' % (bootstrap_script.s3_bucket_name,
                                bootstrap_script.s3_object_key),
                'Config':
                config,
                'VPCID':
                vpc.vpc_id,
                'MasterSubnetID':
                vpc.public_subnets[0].subnet_id,
                'ComputeSubnetID':
                vpc.private_subnets[0].subnet_id,
                'PostInstallScriptS3Url':
                "".join([
                    's3://', pcluster_post_install_script.s3_bucket_name, "/",
                    pcluster_post_install_script.s3_object_key
                ]),
                'PostInstallScriptBucket':
                pcluster_post_install_script.s3_bucket_name,
                'S3ReadWriteResource':
                data_bucket.bucket_arn,
                'S3ReadWriteUrl':
                's3://%s' % (data_bucket.bucket_name),
                'KeyPairId':
                c9_createkeypair_cr.ref,
                'KeyPairSecretArn':
                c9_ssh_private_key_secret.ref,
                'UserArn':
                user.attr_arn,
                'PclusterVersion':
                pcluster_version.value_as_string
            })
        c9_bootstrap_cr.node.add_dependency(instance_id)
        c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr)
        c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret)
        c9_bootstrap_cr.node.add_dependency(data_bucket)

        enable_budget = cdk.CfnParameter(self,
                                         "EnableBudget",
                                         default="true",
                                         type="String",
                                         allowed_values=['true', 'false'
                                                         ]).value_as_string
        # Budgets
        budget_properties = {
            'budgetType': "COST",
            'timeUnit': "ANNUALLY",
            'budgetLimit': {
                'amount':
                cdk.CfnParameter(
                    self,
                    'BudgetLimit',
                    description=
                    'The initial budget for this project in USD ($).',
                    default=2000,
                    type='Number').value_as_number,
                'unit':
                "USD",
            },
            'costFilters': None,
            'costTypes': {
                'includeCredit': False,
                'includeDiscount': True,
                'includeOtherSubscription': True,
                'includeRecurring': True,
                'includeRefund': True,
                'includeSubscription': True,
                'includeSupport': True,
                'includeTax': True,
                'includeUpfront': True,
                'useAmortized': False,
                'useBlended': False,
            },
            'plannedBudgetLimits': None,
            'timePeriod': None,
        }

        email = {
            'notification': {
                'comparisonOperator': "GREATER_THAN",
                'notificationType': "ACTUAL",
                'threshold': 80,
                'thresholdType': "PERCENTAGE",
            },
            'subscribers': [{
                'address':
                cdk.CfnParameter(
                    self,
                    'NotificationEmail',
                    description=
                    'This email address will receive billing alarm notifications when 80% of the budget limit is reached.',
                    default='*****@*****.**').value_as_string,
                'subscriptionType':
                "EMAIL",
            }]
        }

        overall_budget = budgets.CfnBudget(
            self,
            "HPCBudget",
            budget=budget_properties,
            notifications_with_subscribers=[email],
        )
        overall_budget.cfn_options.condition = cdk.CfnCondition(
            self,
            "BudgetCondition",
            expression=cdk.Fn.condition_equals(enable_budget, "true"))
Exemple #27
0
    def __init__(
        self,
        scope: cdk.Construct,
        construct_id: str,
        stack_log_level: str,
        vpc,
        my_sql_db_sg,
        store_events_db_endpoint,
        sales_events_bkt,
        _glue_etl_role,
        glue_db_name: str,
        glue_table_name: str,
        tgt_db_secret,
        **kwargs,
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        self.template_options.metadata = {"License": "Miztiik Corp."}

        # ADD Permissions to our Glue JOB Role to Access Secrets
        tgt_db_secret.grant_read(_glue_etl_role)

        # # Create GLUE JDBC Connection for RDS MySQL

        # Allow ALL PORTS within SG for GLUE Connections to connect
        # https://docs.aws.amazon.com/glue/latest/dg/connection-defining.html#connection-properties-jdbc
        # https://docs.aws.amazon.com/glue/latest/dg/setup-vpc-for-glue-access.html
        # https://docs.amazonaws.cn/en_us/glue/latest/dg/connection-defining.html

        rds_mysql_conn_props = _glue.CfnConnection.ConnectionInputProperty(
            connection_type="JDBC",
            description="Glue Connection for RDS MySQL Store Events Database",
            name="rdsMySQL57Conn",
            physical_connection_requirements=_glue.CfnConnection.PhysicalConnectionRequirementsProperty(
                security_group_id_list=[my_sql_db_sg.security_group_id],
                subnet_id=vpc.select_subnets(
                        subnet_type=_ec2.SubnetType.PRIVATE
                ).subnet_ids[1]
            ),
            connection_properties={
                "JDBC_CONNECTION_URL": f"jdbc:mysql://{store_events_db_endpoint}:3306/store_events",
                "JDBC_ENFORCE_SSL": "false",
                "USERNAME": "******",
                "PASSWORD": "******"
            }
        )

        rds_mysql_conn = _glue.CfnConnection(
            self,
            "rdsMySQLGlueConnection",
            catalog_id=f"{cdk.Aws.ACCOUNT_ID}",
            connection_input=rds_mysql_conn_props
        )

        # Create the Glue job to convert incoming JSON to parquet
        # Read GlueSpark Code
        try:
            with open(
                "stacks/back_end/glue_stacks/glue_job_scripts/load_json_to_rds.py",
                encoding="utf-8",
                mode="r",
            ) as f:
                load_json_to_rds = f.read()
        except OSError:
            print("Unable to read Glue Job Code")
            raise

        etl_script_asset = _s3_assets.Asset(
            self,
            "etlScriptAsset",
            path="stacks/back_end/glue_stacks/glue_job_scripts/load_json_to_rds.py"
        )

        self.etl_prefix = "stream-etl"
        _glue_etl_job = _glue.CfnJob(
            self,
            "glues3ToRdsIngestorJob",
            name="s3-to-rds-ingestor",
            description="Glue Job to ingest JSON data from S3 to RDS",
            role=_glue_etl_role.role_arn,
            glue_version="2.0",
            command=_glue.CfnJob.JobCommandProperty(
                name="glueetl",
                script_location=f"s3://{etl_script_asset.s3_bucket_name}/{etl_script_asset.s3_object_key}",
                python_version="3"
            ),
            connections={"connections": [rds_mysql_conn_props.name]},
            default_arguments={
                "--enable-metrics": True,
                "--enable-continuous-cloudwatch-log": True,
                "--job-bookmark-option": "job-bookmark-enable",
                '--TempDir': f"s3://{sales_events_bkt.bucket_name}/bookmarks",
                "--src_db_name": glue_db_name,
                "--src_etl_bkt": f"{sales_events_bkt.bucket_name}",
                "--crawler_tbl_prefix": "txns_",
                "--tgt_db_secret_arn": tgt_db_secret.secret_arn,
                "--tgt_tbl_name": glue_table_name,
                "--conn_name": f"{rds_mysql_conn_props.name}"
            },
            allocated_capacity=1,
            # timeout=2,
            max_retries=2,
            execution_property=_glue.CfnJob.ExecutionPropertyProperty(
                max_concurrent_runs=2)
        )

        # Configure a Trigger - Every hour
        _glue_etl_job_trigger = _glue.CfnTrigger(
            self,
            "glueEtlJobtrigger",
            type="SCHEDULED",
            description="Miztiik Automation: Trigger S3 to RDS Ingestor glue job every hour",
            schedule="cron(0 1 * * ? *)",
            start_on_creation=False,
            actions=[
                _glue.CfnTrigger.ActionProperty(
                    job_name=f"{_glue_etl_job.name}",
                    timeout=2
                )
            ]
        )
        _glue_etl_job_trigger.add_depends_on(_glue_etl_job)

        # Configure Glue Workflow
        _glue_etl_job_workflow = _glue.CfnWorkflow(
            self,
            "glueEtlJobWorkflow"
        )

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = cdk.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description="To know more about this automation stack, check out our github page.",
        )

        output_1 = cdk.CfnOutput(
            self,
            "RDSIngestorETLGlueJob",
            value=f"https://console.aws.amazon.com/gluestudio/home?region={cdk.Aws.REGION}#/jobs",
            description="Glue Job to ingest JSON data from S3 to RDS.",
        )
Exemple #28
0
    def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        dirname = os.path.dirname(__file__)

        ecr_repo = ecr.Repository.from_repository_name(
            self,
            'UmccriseEcrRepo',
            repository_name='umccrise'
        )

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole')
            ]
        )

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole')
            ]
        )

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role')
            ]
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ec2:Describe*",
                    "ec2:AttachVolume",
                    "ec2:CreateVolume",
                    "ec2:CreateTags",
                    "ec2:ModifyInstanceAttribute"
                ],
                resources=["*"]
            )
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:ListClusters"
                ],
                resources=["*"]
            )
        )
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name]
        )

        ################################################################################
        # Network
        # (Import common infrastructure (maintained via TerraForm)

        # VPC
        vpc = ec2.Vpc.from_lookup(
            self,
            'UmccrMainVpc',
            tags={'Name': 'main-vpc', 'Stack': 'networking'}
        )

        batch_security_group = ec2.SecurityGroup(
            self,
            "BatchSecurityGroup",
            vpc=vpc,
            description="Allow all outbound, no inbound traffic"
        )
        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [
            {
                'deviceName': '/dev/xvdf',
                'ebs': {
                    'deleteOnTermination': True,
                    'encrypted': True,
                    'volumeSize': 2048,
                    'volumeType': 'gp2'
                }
            }
        ]

        # Set up custom user data to configure the Batch instances
        umccrise_wrapper_asset = assets.Asset(
            self,
            'UmccriseWrapperAsset',
            path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh")
        )
        umccrise_wrapper_asset.grant_read(batch_instance_role)

        user_data_asset = assets.Asset(
            self,
            'UserDataAsset',
            path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh")
        )
        user_data_asset.grant_read(batch_instance_role)

        user_data = ec2.UserData.for_linux()
        local_path = user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key
        )
        user_data.add_execute_file_command(
            file_path=local_path,
            arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}"
        )

        # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data
        mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0')
        mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="')
        mime_wrapper.add_commands('')
        mime_wrapper.add_commands('--==MYBOUNDARY==')
        mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"')
        mime_wrapper.add_commands('')
        # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI...
        mime_wrapper.add_commands('yum -y install unzip')
        mime_wrapper.add_commands('cd /opt')
        mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"')
        mime_wrapper.add_commands('unzip awscliv2.zip')
        mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin')
        # insert our actual user data payload
        mime_wrapper.add_commands(user_data.render())
        mime_wrapper.add_commands('--==MYBOUNDARY==--')

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(mime_wrapper.render()),
                'blockDeviceMappings': block_device_mappings
            }
        )

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest'
        )

        my_compute_res = batch.ComputeResources(
            type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND),
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=320,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE,
                # availability_zones=["ap-southeast-2a"]
            ),
            security_groups=[batch_security_group]
            # compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # https://github.com/aws/aws-cdk/issues/7350
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccr_ise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res
        )
        # child = my_compute_env.node.default_child
        # child_comp_res = child.compute_resources
        # child_comp_res.tags = "{'Foo': 'Bar'}"

        job_queue = batch.JobQueue(
            self,
            'UmccriseJobQueue',
            job_queue_name='cdk-umccrise_job_queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=my_compute_env,
                    order=1
                )
            ],
            priority=10
        )

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(name=props['container_image']),
            vcpus=32,
            memory_limit_mib=100000,
            command=[
                "/opt/container/umccrise-wrapper.sh",
                "Ref::vcpus"
            ],
            mount_points=[
                ecs.MountPoint(
                    container_path='/work',
                    read_only=False,
                    source_volume='work'
                ),
                ecs.MountPoint(
                    container_path='/opt/container',
                    read_only=True,
                    source_volume='container'
                )
            ],
            volumes=[
                ecs.Volume(
                    name='container',
                    host=ecs.Host(
                        source_path='/opt/container'
                    )
                ),
                ecs.Volume(
                    name='work',
                    host=ecs.Host(
                        source_path='/mnt'
                    )
                )
            ],
            privileged=True
        )

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5)
        )

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess')  # TODO: restrict!
            ]
        )

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)
        ecr_repo.grant(lambda_role, 'ecr:ListImages')

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(
            self,
            'UmccriseLambda',
            function_name='umccrise_batch_lambda',
            handler='umccrise.lambda_handler',
            runtime=lmbda.Runtime.PYTHON_3_7,
            code=lmbda.Code.from_asset('lambdas/umccrise'),
            environment={
                'JOBNAME_PREFIX': "UMCCRISE_",
                'JOBQUEUE': job_queue.job_queue_name,
                'UMCCRISE_MEM': '100000',
                'UMCCRISE_VCPUS': '32',
                'JOBDEF': job_definition.job_definition_name,
                'REFDATA_BUCKET': props['refdata_bucket'],
                'INPUT_BUCKET': props['input_bucket'],
                'RESULT_BUCKET': props['result_bucket'],
                'IMAGE_CONFIGURABLE': props['image_configurable']
            },
            role=lambda_role
        )
Exemple #29
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        vpc_stack,
        logstash_ec2=True,
        logstash_fargate=True,
        **kwargs,
    ) -> None:
        super().__init__(scope, id, **kwargs)

        # get s3 bucket name
        s3client = boto3.client("s3")
        s3_bucket_list = s3client.list_buckets()
        s3_bucket_name = ""
        for bkt in s3_bucket_list["Buckets"]:
            try:
                bkt_tags = s3client.get_bucket_tagging(
                    Bucket=bkt["Name"])["TagSet"]
                for keypairs in bkt_tags:
                    if (keypairs["Key"] == "aws:cloudformation:stack-name"
                            and keypairs["Value"] == "elkk-athena"):
                        s3_bucket_name = bkt["Name"]
            except ClientError as err:
                if err.response["Error"]["Code"] in [
                        "NoSuchTagSet", "NoSuchBucket"
                ]:
                    pass
                else:
                    print(f"Unexpected error: {err}")

        # get elastic endpoint
        esclient = boto3.client("es")
        es_domains = esclient.list_domain_names()
        try:
            es_domain = [
                dom["DomainName"] for dom in es_domains["DomainNames"]
                if "elkk-" in dom["DomainName"]
            ][0]
            es_endpoint = esclient.describe_elasticsearch_domain(
                DomainName=es_domain)
            es_endpoint = es_endpoint["DomainStatus"]["Endpoints"]["vpc"]
        except IndexError:
            es_endpoint = ""

        # assets for logstash stack
        logstash_yml = assets.Asset(self,
                                    "logstash_yml",
                                    path=os.path.join(dirname, "logstash.yml"))
        logstash_repo = assets.Asset(self,
                                     "logstash_repo",
                                     path=os.path.join(dirname,
                                                       "logstash.repo"))

        # update conf file to .asset
        # kafka brokerstring does not need reformatting
        logstash_conf_asset = file_updated(
            os.path.join(dirname, "logstash.conf"),
            {
                "$s3_bucket": s3_bucket_name,
                "$es_endpoint": es_endpoint,
                "$kafka_brokers": kafka_get_brokers(),
                "$elkk_region": os.environ["CDK_DEFAULT_REGION"],
            },
        )
        logstash_conf = assets.Asset(
            self,
            "logstash.conf",
            path=logstash_conf_asset,
        )

        # logstash security group
        logstash_security_group = ec2.SecurityGroup(
            self,
            "logstash_security_group",
            vpc=vpc_stack.get_vpc,
            description="logstash security group",
            allow_all_outbound=True,
        )
        core.Tags.of(logstash_security_group).add("project",
                                                  constants["PROJECT_TAG"])
        core.Tags.of(logstash_security_group).add("Name", "logstash_sg")

        # Open port 22 for SSH
        logstash_security_group.add_ingress_rule(
            ec2.Peer.ipv4(f"{external_ip}/32"),
            ec2.Port.tcp(22),
            "from own public ip",
        )

        # get security group for kafka
        ec2client = boto3.client("ec2")
        security_groups = ec2client.describe_security_groups(Filters=[{
            "Name":
            "tag-value",
            "Values": [constants["PROJECT_TAG"]]
        }], )

        # if kafka sg does not exist ... don't add it
        try:
            kafka_sg_id = [
                sg["GroupId"] for sg in security_groups["SecurityGroups"]
                if "kafka security group" in sg["Description"]
            ][0]
            kafka_security_group = ec2.SecurityGroup.from_security_group_id(
                self, "kafka_security_group", security_group_id=kafka_sg_id)

            # let in logstash
            kafka_security_group.connections.allow_from(
                logstash_security_group,
                ec2.Port.all_traffic(),
                "from logstash",
            )
        except IndexError:
            # print("kafka_sg_id and kafka_security_group not found")
            pass

        # get security group for elastic
        try:
            elastic_sg_id = [
                sg["GroupId"] for sg in security_groups["SecurityGroups"]
                if "elastic security group" in sg["Description"]
            ][0]
            elastic_security_group = ec2.SecurityGroup.from_security_group_id(
                self,
                "elastic_security_group",
                security_group_id=elastic_sg_id)

            # let in logstash
            elastic_security_group.connections.allow_from(
                logstash_security_group,
                ec2.Port.all_traffic(),
                "from logstash",
            )
        except IndexError:
            pass

        # elastic policy
        access_elastic_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            actions=[
                "es:ListDomainNames",
                "es:DescribeElasticsearchDomain",
                "es:ESHttpPut",
            ],
            resources=["*"],
        )

        # kafka policy
        access_kafka_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            actions=["kafka:ListClusters", "kafka:GetBootstrapBrokers"],
            resources=["*"],
        )

        # s3 policy
        access_s3_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            actions=["s3:ListBucket", "s3:PutObject"],
            resources=["*"],
        )

        # create the Logstash instance
        if logstash_ec2:
            # userdata for Logstash
            logstash_userdata = user_data_init(
                log_group_name="elkk/logstash/instance")
            # create the instance
            logstash_instance = ec2.Instance(
                self,
                "logstash_client",
                instance_type=ec2.InstanceType(constants["LOGSTASH_INSTANCE"]),
                machine_image=ec2.AmazonLinuxImage(
                    generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2),
                vpc=vpc_stack.get_vpc,
                vpc_subnets=SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
                key_name=constants["KEY_PAIR"],
                security_group=logstash_security_group,
                user_data=logstash_userdata,
            )
            core.Tag.add(logstash_instance, "project",
                         constants["PROJECT_TAG"])

            # add access to the file assets
            logstash_yml.grant_read(logstash_instance)
            logstash_repo.grant_read(logstash_instance)
            logstash_conf.grant_read(logstash_instance)

            # add permissions to instance
            logstash_instance.add_to_role_policy(
                statement=access_elastic_policy)
            logstash_instance.add_to_role_policy(statement=access_kafka_policy)
            logstash_instance.add_to_role_policy(statement=access_s3_policy)

            # add log permissions
            instance_add_log_permissions(logstash_instance)

            # add commands to the userdata
            logstash_userdata.add_commands(
                # get setup assets files
                f"aws s3 cp s3://{logstash_yml.s3_bucket_name}/{logstash_yml.s3_object_key} /home/ec2-user/logstash.yml",
                f"aws s3 cp s3://{logstash_repo.s3_bucket_name}/{logstash_repo.s3_object_key} /home/ec2-user/logstash.repo",
                f"aws s3 cp s3://{logstash_conf.s3_bucket_name}/{logstash_conf.s3_object_key} /home/ec2-user/logstash.conf",
                # install java
                "amazon-linux-extras install java-openjdk11 -y",
                # install git
                "yum install git -y",
                # install pip
                "yum install python-pip -y",
                # get elastic output to es
                "git clone https://github.com/awslabs/logstash-output-amazon_es.git /home/ec2-user/logstash-output-amazon_es",
                # logstash
                "rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch",
                # move logstash repo file
                "mv -f /home/ec2-user/logstash.repo /etc/yum.repos.d/logstash.repo",
                # get to the yum
                "yum install logstash -y",
                # add user to logstash group
                "usermod -a -G logstash ec2-user",
                # move logstash.yml to final location
                "mv -f /home/ec2-user/logstash.yml /etc/logstash/logstash.yml",
                # move logstash.conf to final location
                "mv -f /home/ec2-user/logstash.conf /etc/logstash/conf.d/logstash.conf",
                # move plugin
                "mkdir /usr/share/logstash/plugins",
                "mv -f /home/ec2-user/logstash-output-amazon_es /usr/share/logstash/plugins/logstash-output-amazon_es",
                # update gemfile
                """sed -i '5igem "logstash-output-amazon_es", :path => "/usr/share/logstash/plugins/logstash-output-amazon_es"' /usr/share/logstash/Gemfile""",
                # update ownership
                "chown -R logstash:logstash /etc/logstash",
                # start logstash
                "systemctl start logstash.service",
            )
            # add the signal
            logstash_userdata.add_signal_on_exit_command(
                resource=logstash_instance)

            # add creation policy for instance
            logstash_instance.instance.cfn_options.creation_policy = core.CfnCreationPolicy(
                resource_signal=core.CfnResourceSignal(count=1,
                                                       timeout="PT10M"))

        # fargate for logstash
        if logstash_fargate:
            # cloudwatch log group for containers
            logstash_logs_containers = logs.LogGroup(
                self,
                "logstash_logs_containers",
                log_group_name="elkk/logstash/container",
                removal_policy=core.RemovalPolicy.DESTROY,
                retention=logs.RetentionDays.ONE_WEEK,
            )
            # docker image for logstash
            logstash_image_asset = ecr_assets.DockerImageAsset(
                self,
                "logstash_image_asset",
                directory=dirname  # , file="Dockerfile"
            )

            # create the fargate cluster
            logstash_cluster = ecs.Cluster(self,
                                           "logstash_cluster",
                                           vpc=vpc_stack.get_vpc)
            core.Tag.add(logstash_cluster, "project", constants["PROJECT_TAG"])

            # the task
            logstash_task = ecs.FargateTaskDefinition(
                self,
                "logstash_task",
                cpu=512,
                memory_limit_mib=1024,
            )

            # add container to the task
            logstash_task.add_container(
                logstash_image_asset.source_hash,
                image=ecs.ContainerImage.from_docker_image_asset(
                    logstash_image_asset),
                logging=ecs.LogDrivers.aws_logs(
                    stream_prefix="elkk", log_group=logstash_logs_containers),
            )

            # add permissions to the task
            logstash_task.add_to_task_role_policy(access_s3_policy)
            logstash_task.add_to_task_role_policy(access_elastic_policy)

            # the service
            logstash_service = (ecs.FargateService(
                self,
                "logstash_service",
                cluster=logstash_cluster,
                task_definition=logstash_task,
                security_group=logstash_security_group,
                deployment_controller=ecs.DeploymentController(
                    type=ecs.DeploymentControllerType.ECS),
            ).auto_scale_task_count(
                min_capacity=3, max_capacity=10).scale_on_cpu_utilization(
                    "logstash_scaling",
                    target_utilization_percent=75,
                    scale_in_cooldown=core.Duration.seconds(60),
                    scale_out_cooldown=core.Duration.seconds(60),
                ))
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        with open('./props/tasksetting.json', 'r') as f1:
            py_json1 = json.load(f1)
            ts = json.dumps(py_json1)

        # with open('./props/mappingrule.json', 'r') as f2:
        #     py_json2 = json.load(f2)
        #     mr = json.dumps(py_json2)

        with open('./props/config.json', 'r') as f2:
            configuration = json.load(f2)

        def getMappingrules(self, table_list):
            rules = []
            for index, value in enumerate(table_list, 1):
                rules.append({
                    "rule-type": "selection",
                    "rule-id": str(index),
                    "rule-name": str(index),
                    "object-locator": {
                        "schema-name": value['schemaName'],
                        "table-name": value['tableName']
                    },
                    "rule-action": "include",
                    "filters": []
                })
            mapping_rules = {"rules": rules}
            return json.dumps(mapping_rules)

        # The code that defines your stack goes here
        S3Accessrole = _iam.Role(
            self,
            'dmsrole',
            assumed_by=_iam.ServicePrincipal('dms.amazonaws.com'),
            managed_policies=[
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonS3FullAccess')
            ])

        raw_bucket = s3.Bucket(self,
                               'rawbucket',
                               bucket_name='rawbucket-datalake-cdk-oregon')
        raw_bucket.add_lifecycle_rule(
            enabled=configuration['s3LifecycleRule']['enabled'],
            expiration=core.Duration.days(
                configuration['s3LifecycleRule']['expiration']))

        #my_table = ddb.Table(self, id ='dunamoTable', table_name = 'testcdktable',
        #partition_key = ddb.Attribute(name ='lastname',type = ddb.AttributeType.STRING) )

        dl_dms = _dms.CfnReplicationInstance(
            self,
            'dmsreplication',
            replication_instance_class=configuration['DMS_instance_setting']
            ['instance_class'],
            replication_instance_identifier='datalake-instance-cdk',
            allocated_storage=configuration['DMS_instance_setting']
            ['allocated_storage'])

        source_endpoint = _dms.CfnEndpoint(
            self,
            'sourceendpoint',
            endpoint_type='source',
            engine_name=configuration['engineName'],
            database_name=configuration['databaseName'],
            username=configuration['username'],
            password=configuration['password'],
            port=configuration['port'],
            server_name=configuration['serverName'],
        )

        target_endpoint = _dms.CfnEndpoint(
            self,
            'targetendpoint',
            endpoint_type='target',
            engine_name='s3',
            s3_settings={
                'bucketName': raw_bucket.bucket_name,
                'serviceAccessRoleArn': S3Accessrole.role_arn
            },
            extra_connection_attributes='dataFormat=parquet')

        dms_task = _dms.CfnReplicationTask(
            self,
            'data2lake-task',
            migration_type='full-load-and-cdc',
            replication_instance_arn=dl_dms.ref,
            source_endpoint_arn=source_endpoint.ref,
            target_endpoint_arn=target_endpoint.ref,
            replication_task_settings=ts,
            table_mappings=getMappingrules(self, configuration['tableList']))

        my_table = ddb.Table(self,
                             id='dynamoTable',
                             table_name='ControllerTable',
                             partition_key=ddb.Attribute(
                                 name='path', type=ddb.AttributeType.STRING),
                             billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        datalake_bucket = s3.Bucket(self,
                                    'datalakebucket',
                                    bucket_name='datalake-bucket-cdk-oregon')

        glue_role = _iam.Role(
            self,
            'gluerole',
            assumed_by=_iam.ServicePrincipal('glue.amazonaws.com'),
            managed_policies=[
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSGlueServiceRole')
            ])

        raw_bucket.grant_read(glue_role)
        datalake_bucket.grant_read_write(glue_role)

        #lake formation settings
        #If you have attached managed policy ('AWSLakeFormationDataAdmin') to your own iam user, you should change that policy to allow "lakeformation:PutDataLakeSettings",
        #so that the lake setting can be allowed by below code in cdk.
        lake_admin_setting = _lakeformation.CfnDataLakeSettings(
            self,
            'data-lake-GrantAdmin',
            admins=[
                _lakeformation.CfnDataLakeSettings.DataLakePrincipalProperty(
                    data_lake_principal_identifier=configuration[
                        'executiveArn'])
            ])

        glue_database = _glue.Database(self,
                                       'gluedatabase',
                                       database_name='data_lake_gluedb')

        glue_database.node.add_dependency(lake_admin_setting)

        glue_role_permission_inLakeFormation = _lakeformation.CfnPermissions(
            self,
            'permission-glueRole',
            data_lake_principal=_lakeformation.CfnPermissions.
            DataLakePrincipalProperty(
                data_lake_principal_identifier=glue_role.role_arn),
            resource=_lakeformation.CfnPermissions.ResourceProperty(
                database_resource=_lakeformation.CfnPermissions.
                DatabaseResourceProperty(name=glue_database.database_name)),
            permissions=['ALL'])

        crawler = _glue.CfnCrawler(
            self,
            'datalakecrawler',
            name='Crawler-datalake-cdk',
            role=glue_role.role_arn,
            targets={
                's3Targets': [{
                    'path':
                    's3://' + datalake_bucket.bucket_name + '/datalake/'
                }]
            },
            database_name='data_lake_gluedb',
            configuration=
            "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
        )

        initialload_script = S3Assets.Asset(self,
                                            'initial-load-code',
                                            path='./Gluejob/InitialLoad.py')
        incrementalload_script = S3Assets.Asset(
            self, 'incremental-load-code', path='./Gluejob/IncrementalLoad.py')

        initialload_script.grant_read(glue_role)
        incrementalload_script.grant_read(glue_role)
        my_table.grant_full_access(glue_role)

        initial_load_job = _glue.CfnJob(
            self,
            'initial-job',
            name='InitialLoad-cdk',
            command=_glue.CfnJob.JobCommandProperty(
                name='glueetl',
                python_version='3',
                script_location='s3://' + initialload_script.s3_bucket_name +
                '/' + initialload_script.s3_object_key),
            role=glue_role.role_arn,
            default_arguments={
                '--prefix': str(configuration['tableList']),
                '--bucket': raw_bucket.bucket_name,
                '--datalake_bucket': datalake_bucket.bucket_name,
                '--datalake_prefix': 'datalake/',
                '--region': CdkpyStack.of(self).region,
                '--controller_table_name': my_table.table_name
            },
            allocated_capacity=configuration['glue_job_setting']
            ['job_capacity'],
            execution_property=_glue.CfnJob.ExecutionPropertyProperty(
                max_concurrent_runs=configuration['glue_job_setting']
                ['max_concurrent_run_JobExecution']))

        incremental_load_job = _glue.CfnJob(
            self,
            'increment-job',
            name='IncrementalLoad-cdk',
            command=_glue.CfnJob.JobCommandProperty(
                name='glueetl',
                script_location='s3://' +
                incrementalload_script.s3_bucket_name + '/' +
                incrementalload_script.s3_object_key,
                python_version='3'),
            role=glue_role.role_arn,
            default_arguments={
                '--prefix': str(configuration['tableList']),
                '--bucket': raw_bucket.bucket_name,
                '--datalake_bucket': datalake_bucket.bucket_name,
                '--datalake_prefix': 'datalake/',
                '--region': CdkpyStack.of(self).region,
                '--controller_table_name': my_table.table_name
            },
            allocated_capacity=2,
            execution_property=_glue.CfnJob.ExecutionPropertyProperty(
                max_concurrent_runs=1))

        job_trigger = _glue.CfnTrigger(
            self,
            'datalake-glue-trigger',
            type='SCHEDULED',
            schedule=configuration['job_trigger_schedule'],
            start_on_creation=False,
            actions=[
                _glue.CfnTrigger.ActionProperty(job_name='IncrementalLoad-cdk')
            ])

        dl_sns = _sns.Topic(self, 'datalake_sns', display_name='data-lake-sns')

        endpoint_email = configuration['emailSubscriptionList']

        for emails in endpoint_email:
            dl_sns.add_subscription(_subscrption.EmailSubscription(emails))

        #Another way to subscribe: dl_subscription = _sns.Subscription(self,'email-subscrption',topic = dl_sns,endpoint='*****@*****.**',protocol= _sns.SubscriptionProtocol.EMAIL)

        glue_events_target = _events_targets.SnsTopic(dl_sns)

        glue_events_rule = _events.Rule(
            self,
            'gluejobevents-datalake',
            description='Using for tracking the failed glue job of data lake',
            rule_name='dl-gluejob-event',
            event_pattern=_events.EventPattern(
                source=['aws.glue'],
                detail_type=['Glue Job State Change'],
                detail={
                    "jobName": [initial_load_job.name],
                    "state": ["FAILED"]
                }),
            targets=[glue_events_target])

        dms_subscription = _dms.CfnEventSubscription(
            self,
            'dmsevents-datalake',
            sns_topic_arn=dl_sns.topic_arn,
            subscription_name='datalake-dmsevents',
            source_type='replication-task',
            event_categories=['failure'])