コード例 #1
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        cluster_name: str,
        **kwargs,
    ) -> None:
        super().__init__(scope, id, **kwargs)

        # EKS admin role
        self._clusterAdminRole = iam.Role(
            self, 'clusterAdmin', assumed_by=iam.AccountRootPrincipal())
        self._clusterAdminRole.add_to_policy(
            iam.PolicyStatement(
                resources=["*"],
                actions=[
                    "eks:Describe*", "eks:List*", "eks:AccessKubernetesApi",
                    "ssm:GetParameter", "iam:ListRoles"
                ],
            ))
        core.Tags.of(self._clusterAdminRole).add(key='eks/%s/type' %
                                                 cluster_name,
                                                 value='admin-role')

        # Managed Node Group Instance Role
        _managed_node_managed_policies = (
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonEKSWorkerNodePolicy'),
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonEKS_CNI_Policy'),
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonEC2ContainerRegistryReadOnly'),
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'CloudWatchAgentServerPolicy'),
        )
        self._managed_node_role = iam.Role(
            self,
            'NodeInstance-Role',
            path='/',
            assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'),
            managed_policies=list(_managed_node_managed_policies),
        )

        # Override Cfn Nag rule
        scan.suppress_cfnnag_rule(
            'W12', 'by default the role has * resource',
            self._clusterAdminRole.node.find_child(
                'DefaultPolicy').node.default_child)
def add_distribution(scope: core.Construct, id: str, alb_dns_name: str,
                     port: int, logbucket: s3.IBucket) -> cf.IDistribution:

    load_balancer_arn = core.Fn.get_att(alb_dns_name, "DNSName")
    security_group_id = core.Fn.get_att(alb_dns_name, "SecurityGroups")

    alb2 = alb.ApplicationLoadBalancer.from_application_load_balancer_attributes(
        scope,
        id,
        load_balancer_arn=load_balancer_arn.to_string(),
        security_group_id=security_group_id.to_string(),
        load_balancer_dns_name=alb_dns_name)
    _origin = origins.LoadBalancerV2Origin(
        alb2,
        http_port=port,
        protocol_policy=cf.OriginProtocolPolicy.HTTP_ONLY)
    dist = cf.Distribution(
        scope,
        "CF-" + id,
        default_behavior={
            "origin": _origin,
            "allowed_methods": cf.AllowedMethods.ALLOW_ALL,
            "cache_policy": cf.CachePolicy.CACHING_DISABLED,
            "origin_request_policy": cf.OriginRequestPolicy.ALL_VIEWER,
            "viewer_protocol_policy": cf.ViewerProtocolPolicy.REDIRECT_TO_HTTPS
        },
        minimum_protocol_version=cf.SecurityPolicyProtocol.TLS_V1_2_2019,
        enable_logging=True,
        log_bucket=logbucket)
    # Override Cfn_Nag rule for Cloudfront TLS-1.2 (https://github.com/stelligent/cfn_nag/issues/384)
    scan.suppress_cfnnag_rule(
        'W70',
        'the distribution uses CloudFront domain name and automatically sets the policy to TLSv1',
        dist.node.default_child)

    return dist.distribution_domain_name
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        codebucket: s3.IBucket,
        **kwargs,
    ) -> None:
        super().__init__(scope, id, **kwargs)

        # 1. Create ECR repositories
        self.ecr_repo = ecr.Repository(
            self,
            'ECRRepo',
            image_scan_on_push=True,
            removal_policy=core.RemovalPolicy.DESTROY)
        # 2. Setup deployment CI/CD to deploy docker image to ECR
        pipeline = codepipeline.Pipeline(self,
                                         "Pipeline",
                                         pipeline_name='BuildArcDockerImage',
                                         artifact_bucket=codebucket)
        image_builder = codebuild.PipelineProject(
            self,
            'DockerBuild',
            project_name='BuildArcDockerImage',
            build_spec=codebuild.BuildSpec.from_source_filename(
                'buildspec.yaml'),
            environment=dict(
                build_image=codebuild.LinuxBuildImage.AMAZON_LINUX_2_3,
                privileged=True),
            environment_variables={
                'REPO_ECR':
                codebuild.BuildEnvironmentVariable(
                    value=self.ecr_repo.repository_uri),
            },
            description='Pipeline for docker build',
            timeout=core.Duration.minutes(60))
        image_builder.apply_removal_policy(core.RemovalPolicy.DESTROY)

        # 3. grant permissions for the CI/CD
        codebucket.grant_read_write(pipeline.role)
        codebucket.grant_read_write(image_builder)
        self.ecr_repo.grant_pull_push(image_builder)

        source_output = codepipeline.Artifact('src')
        pipeline.add_stage(
            stage_name='Source',
            actions=[
                codepipeline_actions.S3SourceAction(
                    action_name='S3Trigger',
                    bucket=codebucket,
                    bucket_key='app_code/ecr_build_src.zip',
                    output=source_output,
                    trigger=codepipeline_actions.S3Trigger.POLL),
            ])
        pipeline.add_stage(stage_name='Build',
                           actions=[
                               codepipeline_actions.CodeBuildAction(
                                   action_name='DockerImageBuild',
                                   input=source_output,
                                   project=image_builder)
                           ])

        # Override Cfn Nag warning W12: IAM policy should not allow * resource
        scan.suppress_cfnnag_rule(
            'W12',
            'the role for action of ecr:GetAuthorizationToken requires * resource',
            image_builder.role.node.find_child(
                'DefaultPolicy').node.default_child)

        image_builder.role.node.find_child(
            'DefaultPolicy'
        ).node.default_child.add_metadata(
            'cfn_nag', {
                "rules_to_suppress": [{
                    "id":
                    "W12",
                    "reason":
                    "the role for action of ecr:GetAuthorizationToken requires * resource"
                }, {
                    "id":
                    "W76",
                    "reason":
                    "the IAM policy is complex, need to be higher than 25"
                }]
            })
    def __init__(self, scope: core.Construct, id: str, eks_cluster: ICluster,
                 login_name: str, code_bucket: str, datalake_bucket: str,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        source_dir = os.path.split(os.environ['VIRTUAL_ENV'])[0] + '/source'
        # //******************************************************************************************//
        # //************************ SETUP PERMISSION FOR ARC SPARK JOBS ****************************//
        # //******* create k8s namespace, service account, and IAM role for service account ********//
        # //***************************************************************************************//

        # create k8s namespace
        etl_ns = eks_cluster.add_manifest(
            'SparkNamespace', {
                "apiVersion": "v1",
                "kind": "Namespace",
                "metadata": {
                    "name": "spark",
                    "labels": {
                        "name": "spark"
                    }
                }
            })
        jupyter_ns = eks_cluster.add_manifest(
            'jhubNamespace', {
                "apiVersion": "v1",
                "kind": "Namespace",
                "metadata": {
                    "name": "jupyter",
                    "labels": {
                        "name": "spark"
                    }
                }
            })

        # create k8s service account
        self._etl_sa = eks_cluster.add_service_account('ETLSa',
                                                       name='arcjob',
                                                       namespace='spark')
        self._etl_sa.node.add_dependency(etl_ns)

        _etl_rb = KubernetesManifest(
            self,
            'ETLRoleBinding',
            cluster=eks_cluster,
            manifest=load_yaml_replace_var_local(
                source_dir + '/app_resources/etl-rbac.yaml',
                fields={"{{MY_SA}}": self._etl_sa.service_account_name},
                multi_resource=True))
        _etl_rb.node.add_dependency(self._etl_sa)

        self._jupyter_sa = eks_cluster.add_service_account('jhubServiceAcct',
                                                           name=login_name,
                                                           namespace='jupyter')
        self._jupyter_sa.node.add_dependency(jupyter_ns)

        # Associate AWS IAM role to K8s Service Account
        datalake_bucket = code_bucket if not datalake_bucket.strip(
        ) else datalake_bucket
        _bucket_setting = {
            "{{codeBucket}}": code_bucket,
            "{{datalakeBucket}}": datalake_bucket
        }
        _etl_iam = load_yaml_replace_var_local(
            source_dir + '/app_resources/etl-iam-role.yaml',
            fields=_bucket_setting)
        for statmnt in _etl_iam:
            self._etl_sa.add_to_principal_policy(
                iam.PolicyStatement.from_json(statmnt))
            self._jupyter_sa.add_to_principal_policy(
                iam.PolicyStatement.from_json(statmnt))

# # //*************************************************************************************//
# # //******************** SETUP PERMISSION FOR NATIVE SPARK JOBS   **********************//
# # //***********************************************************************************//
        self._spark_sa = eks_cluster.add_service_account('NativeSparkSa',
                                                         name='nativejob',
                                                         namespace='spark')
        self._spark_sa.node.add_dependency(etl_ns)

        _spark_rb = eks_cluster.add_manifest(
            'sparkRoleBinding',
            load_yaml_replace_var_local(
                source_dir + '/app_resources/native-spark-rbac.yaml',
                fields={"{{MY_SA}}": self._spark_sa.service_account_name}))
        _spark_rb.node.add_dependency(self._spark_sa)

        _native_spark_iam = load_yaml_replace_var_local(
            source_dir + '/app_resources/native-spark-iam-role.yaml',
            fields=_bucket_setting)
        for statmnt in _native_spark_iam:
            self._spark_sa.add_to_principal_policy(
                iam.PolicyStatement.from_json(statmnt))

        # Override Cfn Nag warning W12: IAM policy should not allow * resource
        scan.suppress_cfnnag_rule(
            'W12', 'by default the etl_sa role has * resource',
            self._etl_sa.role.node.find_child(
                'DefaultPolicy').node.default_child)
        scan.suppress_cfnnag_rule(
            'W12', 'by default the role spark_sa has * resource',
            self._spark_sa.role.node.find_child(
                'DefaultPolicy').node.default_child)
        scan.suppress_cfnnag_rule(
            'W12', 'by default the role jupyter_sa has * resource',
            self._jupyter_sa.role.node.find_child(
                'DefaultPolicy').node.default_child)
    def __init__(self, scope: core.Construct, id: str, eksname: str,
                 solution_id: str, version: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self.template_options.description = "(SO0141) SQL based ETL with Apache Spark on Amazon EKS. This solution provides a SQL based ETL option with a open-source declarative framework powered by Apache Spark."
        source_dir = os.path.split(os.environ['VIRTUAL_ENV'])[0] + '/source'

        # Cloudformation input params
        datalake_bucket = core.CfnParameter(
            self,
            "datalakebucket",
            type="String",
            description=
            "Your existing S3 bucket to be accessed by Jupyter Notebook and ETL job. Default: blank",
            default="")
        login_name = core.CfnParameter(
            self,
            "jhubuser",
            type="String",
            description="Your username login to jupyter hub",
            default="sparkoneks")

        # Auto-generate a user login in secrets manager
        key = kms.Key(self,
                      'KMSKey',
                      removal_policy=core.RemovalPolicy.DESTROY,
                      enable_key_rotation=True)
        key.add_alias("alias/secretsManager")
        jhub_secret = secmger.Secret(
            self,
            'jHubPwd',
            generate_secret_string=secmger.SecretStringGenerator(
                exclude_punctuation=True,
                secret_string_template=json.dumps(
                    {'username': login_name.value_as_string}),
                generate_string_key="password"),
            removal_policy=core.RemovalPolicy.DESTROY,
            encryption_key=key)

        # 1. a new bucket to store app code and logs
        self.app_s3 = S3AppCodeConst(self, 'appcode')

        # 2. push docker image to ECR via AWS CICD pipeline
        ecr_image = DockerPipelineConstruct(self, 'image',
                                            self.app_s3.artifact_bucket)
        ecr_image.node.add_dependency(self.app_s3)
        core.CfnOutput(self, 'IMAGE_URI', value=ecr_image.image_uri)

        # 3. EKS base infrastructure
        network_sg = NetworkSgConst(self, 'network-sg', eksname,
                                    self.app_s3.code_bucket)
        iam = IamConst(self, 'iam_roles', eksname)
        eks_cluster = EksConst(self, 'eks_cluster', eksname, network_sg.vpc,
                               iam.managed_node_role, iam.admin_role)
        EksSAConst(self, 'eks_sa', eks_cluster.my_cluster, jhub_secret)
        base_app = EksBaseAppConst(self, 'eks_base_app',
                                   eks_cluster.my_cluster)

        # 4. Spark app access control
        app_security = SparkOnEksSAConst(self, 'spark_service_account',
                                         eks_cluster.my_cluster,
                                         login_name.value_as_string,
                                         self.app_s3.code_bucket,
                                         datalake_bucket.value_as_string)
        app_security.node.add_dependency(base_app.secret_created)
        # 5. Install Arc Jupyter notebook in EKS
        jhub_install = eks_cluster.my_cluster.add_helm_chart(
            'JHubChart',
            chart='jupyterhub',
            repository='https://jupyterhub.github.io/helm-chart',
            release='jhub',
            version='0.11.1',
            namespace='jupyter',
            create_namespace=False,
            values=load_yaml_replace_var_local(
                source_dir + '/app_resources/jupyter-values.yaml',
                fields={
                    "{{codeBucket}}": self.app_s3.code_bucket,
                    "{{region}}": core.Aws.REGION
                }))
        jhub_install.node.add_dependency(app_security)
        # EKS get Jupyter login dynamically from secrets manager
        name_parts = core.Fn.split('-', jhub_secret.secret_name)
        name_no_suffix = core.Fn.join(
            '-',
            [core.Fn.select(0, name_parts),
             core.Fn.select(1, name_parts)])

        config_hub = eks.KubernetesManifest(
            self,
            'JHubConfig',
            cluster=eks_cluster.my_cluster,
            manifest=load_yaml_replace_var_local(
                source_dir + '/app_resources/jupyter-config.yaml',
                fields={
                    "{{MY_SA}}": app_security.jupyter_sa,
                    "{{REGION}}": core.Aws.REGION,
                    "{{SECRET_NAME}}": name_no_suffix
                },
                multi_resource=True))
        config_hub.node.add_dependency(jhub_install)

        # 6. Install ETL orchestrator - Argo in EKS
        # can be replaced by other workflow tool, eg. Airflow
        argo_install = eks_cluster.my_cluster.add_helm_chart(
            'ARGOChart',
            chart='argo-workflows',
            repository='https://argoproj.github.io/argo-helm',
            release='argo',
            version='0.1.4',
            namespace='argo',
            create_namespace=True,
            values=load_yaml_local(source_dir +
                                   '/app_resources/argo-values.yaml'))
        argo_install.node.add_dependency(config_hub)
        # Create argo workflow template for Spark with T-shirt size
        submit_tmpl = eks_cluster.my_cluster.add_manifest(
            'SubmitSparkWrktmpl',
            load_yaml_local(source_dir + '/app_resources/spark-template.yaml'))
        submit_tmpl.node.add_dependency(argo_install)

        # 7. (OPTIONAL) retrieve ALB DNS Name to enable CloudFront in the nested stack.
        # It is used to serve HTTPS requests with its default domain name.
        # Recommend to issue your own TLS certificate, and delete the CF components.
        self._jhub_alb = eks.KubernetesObjectValue(
            self,
            'jhubALB',
            cluster=eks_cluster.my_cluster,
            json_path='..status.loadBalancer.ingress[0].hostname',
            object_type='ingress.networking',
            object_name='jupyterhub',
            object_namespace='jupyter',
            timeout=core.Duration.minutes(10))
        self._jhub_alb.node.add_dependency(config_hub)

        self._argo_alb = eks.KubernetesObjectValue(
            self,
            'argoALB',
            cluster=eks_cluster.my_cluster,
            json_path='..status.loadBalancer.ingress[0].hostname',
            object_type='ingress.networking',
            object_name='argo-argo-workflows-server',
            object_namespace='argo',
            timeout=core.Duration.minutes(10))
        self._argo_alb.node.add_dependency(argo_install)

        # 8. (OPTIONAL) Send solution metrics to AWS
        # turn it off from the CloudFormation mapping section if prefer.
        send_metrics = solution_metrics.SendAnonymousData(
            self,
            "SendMetrics",
            network_sg.vpc,
            self.app_s3.artifact_bucket,
            self.app_s3.s3_deploy_contrust,
            metrics={
                "Solution":
                solution_id,
                "Region":
                core.Aws.REGION,
                "SolutionVersion":
                version,
                "UUID":
                "MY_UUID",
                "UseDataLakeBucket":
                "True" if not datalake_bucket.value_as_string else "False",
                "UseAWSCICD":
                "True" if ecr_image.image_uri else "False",
                "NoAZs":
                len(network_sg.vpc.availability_zones)
            })
        send_metrics.node.add_dependency(self.app_s3.s3_deploy_contrust)

        # 9. (OPTIONAL) Override the cfn Nag rules for AWS Solution CICD deployment
        # remove the section if your CI/CD pipeline doesn't use the cfn_nag utility to validate the CFN.
        k8s_ctl_node = self.node.find_child(
            '@aws-cdk--aws-eks.KubectlProvider')
        cluster_resrc_node = self.node.find_child(
            '@aws-cdk--aws-eks.ClusterResourceProvider')
        scan.suppress_cfnnag_rule(
            'W12', 'by default the role has * resource',
            self.node.find_child('eks_cluster').node.find_child('EKS').node.
            default_child.node.find_child('CreationRole').node.find_child(
                'DefaultPolicy').node.default_child)
        scan.suppress_cfnnag_rule(
            'W11', 'by default the role has * resource',
            self.node.find_child(
                'Custom::AWSCDKOpenIdConnectProviderCustomResourceProvider').
            node.find_child('Role'))
        scan.suppress_lambda_cfnnag_rule(
            k8s_ctl_node.node.find_child('Handler').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            k8s_ctl_node.node.find_child('Provider').node.find_child(
                'framework-onEvent').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            self.node.find_child(
                'Custom::CDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C').
            node.default_child)
        # scan.suppress_lambda_cfnnag_rule(self.node.find_child('Custom::S3AutoDeleteObjectsCustomResourceProvider').node.find_child('Handler'))
        scan.suppress_lambda_cfnnag_rule(
            self.node.find_child(
                'Custom::AWSCDKOpenIdConnectProviderCustomResourceProvider').
            node.find_child('Handler'))
        scan.suppress_lambda_cfnnag_rule(
            self.node.find_child('AWSCDKCfnUtilsProviderCustomResourceProvider'
                                 ).node.find_child('Handler'))
        scan.suppress_lambda_cfnnag_rule(
            cluster_resrc_node.node.find_child(
                'OnEventHandler').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            cluster_resrc_node.node.find_child(
                'IsCompleteHandler').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            cluster_resrc_node.node.find_child('Provider').node.find_child(
                'framework-isComplete').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            cluster_resrc_node.node.find_child('Provider').node.find_child(
                'framework-onTimeout').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            cluster_resrc_node.node.find_child('Provider').node.find_child(
                'framework-onEvent').node.default_child)
        scan.suppress_network_cfnnag_rule(
            self.node.find_child('eks_cluster').node.find_child('EKS').node.
            find_child('ControlPlaneSecurityGroup').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            self.node.find_child('SendMetrics').node.find_child(
                'LambdaProvider').node.find_child(
                    'framework-onEvent').node.default_child)
        scan.suppress_network_cfnnag_rule(
            self.node.find_child('SendMetrics').node.find_child(
                'LambdaProvider').node.find_child('framework-onEvent').node.
            find_child('SecurityGroup').node.default_child)
        scan.suppress_lambda_cfnnag_rule(
            self.node.find_child(
                'SingletonLambda75248a819138468c9ba1bca6c7137599').node.
            default_child)
        scan.suppress_network_cfnnag_rule(
            self.node.find_child(
                'SingletonLambda75248a819138468c9ba1bca6c7137599').node.
            find_child('SecurityGroup').node.default_child)
    def __init__(self, scope: core.Construct, id: str, eksname: str,
                 codebucket: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # //*************************************************//
        # //******************* NETWORK ********************//
        # //************************************************//
        # create VPC
        self._vpc = ec2.Vpc(self, 'eksVpc', max_azs=2, nat_gateways=1)
        core.Tags.of(self._vpc).add('Name', eksname + 'EksVpc')

        self._log_bucket = s3.Bucket.from_bucket_name(self, 'vpc_logbucket',
                                                      codebucket)
        self._vpc.add_flow_log("FlowLogCloudWatch",
                               destination=ec2.FlowLogDestination.to_s3(
                                   self._log_bucket, 'vpcRejectlog/'),
                               traffic_type=ec2.FlowLogTrafficType.REJECT)

        # VPC endpoint security group
        self._vpc_endpoint_sg = ec2.SecurityGroup(
            self,
            'EndpointSg',
            vpc=self._vpc,
            description='Security Group for Endpoint',
        )
        self._vpc_endpoint_sg.add_ingress_rule(
            ec2.Peer.ipv4(self._vpc.vpc_cidr_block), ec2.Port.tcp(port=443))
        core.Tags.of(self._vpc_endpoint_sg).add('Name',
                                                'SparkOnEKS-VPCEndpointSg')

        # Add VPC endpoint
        self._vpc.add_gateway_endpoint(
            "S3GatewayEndpoint",
            service=ec2.GatewayVpcEndpointAwsService.S3,
            subnets=[
                ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
                ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE)
            ])

        self._vpc.add_interface_endpoint(
            "EcrDockerEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER,
            security_groups=[self._vpc_endpoint_sg])
        self._vpc.add_interface_endpoint(
            "CWLogsEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.CLOUDWATCH_LOGS,
            security_groups=[self._vpc_endpoint_sg])
        self._vpc.add_interface_endpoint(
            "AthenaEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.ATHENA,
            security_groups=[self._vpc_endpoint_sg])
        self._vpc.add_interface_endpoint(
            "KMSEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.KMS,
            security_groups=[self._vpc_endpoint_sg])

        # Override Cfn_Nag rule for AWS Solution CICD validation
        for subnet in self._vpc.public_subnets:
            scan.suppress_cfnnag_rule(
                'W33',
                'a public facing ALB is required and ingress from the internet should be permitted.',
                subnet.node.default_child)

        self._vpc_endpoint_sg.node.default_child.add_metadata(
            'cfn_nag', {
                "rules_to_suppress":
                [{
                    "id":
                    "W40",
                    "reason":
                    "Egress IP Protocol of -1 is default and generally considered OK"
                }, {
                    "id": "W5",
                    "reason": "Security Groups with cidr open considered OK"
                }]
            })