Esempio n. 1
0
    def __init__(self, scope: core.Construct, id: str, vpc: aws_ec2.Vpc,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        _subnets = []
        _subnets.append(
            aws_ec2.Subnet(self,
                           'sbn-redshift-1',
                           availability_zone=vpc.availability_zones[0],
                           vpc_id=vpc.vpc_id,
                           cidr_block='10.0.4.0/25'))

        _subnets.append(
            aws_ec2.Subnet(self,
                           'sbn-redshift-2',
                           availability_zone=vpc.availability_zones[1],
                           vpc_id=vpc.vpc_id,
                           cidr_block='10.0.4.128/25'))

        _cluster_subnet_group = aws_redshift.ClusterSubnetGroup(
            self,
            'deta-pipeline-redshift-subnet',
            description='redshift cluster subnet',
            vpc=vpc,
            vpc_subnets=aws_ec2.SubnetSelection(subnets=_subnets))

        aws_redshift.Cluster(
            self,
            'destination-redshift',
            master_user=aws_redshift.Login(master_username='******'),
            vpc=vpc,
            subnet_group=_cluster_subnet_group)
    def __init__(self, scope: core.Construct, id: str, vpc: VpcStack,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        subnet_group = redshift.ClusterSubnetGroup(
            self,
            id="RedshiftSubnetGroup",
            description="Redshift private subnet group",
            vpc=vpc.instance,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.ISOLATED),
        )

        self.redshift_secret = sm.Secret(
            self,
            "redshift-credentials",
            secret_name="redshift-credentials",
            description="Credentials for Amazon Redshift cluster.",
            generate_secret_string=sm.SecretStringGenerator(
                secret_string_template='{"username": "******"}',
                generate_string_key="password",
                password_length=32,
                exclude_characters='"@\\\/',
                exclude_punctuation=True,
            ),
        )

        redshift_login = redshift.Login(
            master_username="******",
            master_password=self.redshift_secret.secret_value_from_json(
                "password"),
        )

        redshift_s3_read_access_role = iam.Role(
            self,
            "redshiftS3AccessRole",
            role_name="redshiftS3AccessRole",
            assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3ReadOnlyAccess")
            ],
        )

        redshift_cluster = redshift.Cluster(
            self,
            id="redshift-cluster",
            master_user=redshift_login,
            vpc=vpc,
            cluster_type=redshift.ClusterType.SINGLE_NODE,
            default_database_name="redshift-db",
            encrypted=True,
            node_type=redshift.NodeType.DC2_LARGE,
            port=5439,
            roles=[redshift_s3_read_access_role],
            security_groups=[vpc.redshift_sg],
            subnet_group=subnet_group,
            removal_policy=core.RemovalPolicy.DESTROY,
        )
        self._instance = redshift_cluster
Esempio n. 3
0
    def __init__(self, scope: core.Construct, id: str, vpc, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # create s3 bucket that redshift will use. if this bucket exists
        # this cdk app will fail, so ensure this has not been created yet

        redshift_bucket = s3.Bucket(
            self,
            "mwaa-redshift import",
            bucket_name=f"{props['redshifts3location'].lower()}",
            versioned=True,
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL
        )

        # create the files folder in the bucket - this is empty but needed in the DAG

        s3deploy.BucketDeployment(self, "File",
        sources=[s3deploy.Source.asset("./files")],
        destination_bucket=redshift_bucket,
        destination_key_prefix="files/",
        prune=False,
        retain_on_delete=False
        )

        redshift_bucket_arn = redshift_bucket.bucket_arn

        # get arn of dags bucket - not sure if this is needed so may remove
        
        dags_bucket = s3.Bucket.from_bucket_name(self, "mwaa-dag-bucket", f"{props['mwaadag'].lower()}")
        dags_bucket_arn = dags_bucket.bucket_arn

        # create redshift secret and redshift user

        # create redshift iam role/policy that we will attach to the RedShift cluster
        # that has the right level of access to a specific S3 bucket
        # you can further lockdown this policy by just specifying s3 actions.

        mwaa_redshift_policy_document = iam.PolicyDocument(
            statements=[
                iam.PolicyStatement(
                    actions=[
                        "s3:*"
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=[
                        f"{redshift_bucket_arn}/*",
                        f"{redshift_bucket_arn}",
                        f"{dags_bucket_arn}/*",
                        f"{dags_bucket_arn}",
                        ]
                )
            ]
        )

        mwaa_redshift_service_role = iam.Role(
            self,
            "mwaa-redshift-service-role2nd",
            assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"),
            inline_policies={"mwaaRedshiftPolicyDocument": mwaa_redshift_policy_document}
        )

        mwaa_redshift_service_role_arn = mwaa_redshift_service_role.role_arn

        # Setup Security Group

        default_redshift_security_group = ec2.SecurityGroup.from_security_group_id(
            self,
            "MWAARedshiftSG",
            security_group_id=vpc.vpc_default_security_group
            )

        default_redshift_security_group.add_ingress_rule(
            peer=default_redshift_security_group,
            connection=ec2.Port.tcp(5439)
            )

        # Modify MWAA security group to enable Redshift access

        mwaa_security_group = ec2.SecurityGroup.from_security_group_id(
            self,
            "SG",
            props['mwaa-sg']
            #mutable=False
            )
        mwaa_security_group.add_ingress_rule(ec2.Peer.any_ipv4(), ec2.Port.tcp(5439), "allow redshift access")
        

        # create subnet groups - one for RedShift and one for the VPE we will create
        # the VPE subnet will take in parameters we provide that are the subnet-ids
        # of the VPC where MWAA is deployed

        redshift_cluster_subnet_group = redshift.ClusterSubnetGroup(
            self,
            "RedshiftCSG",
            vpc = vpc,
            #vpc_subnets = ec2.SubnetSelection(subnets=vpc.private_subnets),
            vpc_subnets = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE),
            description="Redshift Cluster Subnet Group"
        )

        ## get all the subnet ids from the MWAA VPC

        subnet_ids = []
        mwaavpc = ec2.Vpc.from_lookup(
            self,
            "MWAA VPC",
            vpc_id=props['mwaa-vpc-id']
        )
        for subnet in mwaavpc.private_subnets:
            subnet_ids.append(subnet.subnet_id)
        for subnet in mwaavpc.public_subnets:
            subnet_ids.append(subnet.subnet_id)
        
        vpe_redshift_cluster_subnet_group = redshift.CfnClusterSubnetGroup(
            self,
            "MWAAVPERedshiftCSG",
            subnet_ids = subnet_ids,
            description="MWAA VPE Redshift Cluster Subnet Group"
        )

        redshiftclustername = f"{props['redshiftclustername'].lower()}"

        cluster = redshift.Cluster(
            self,
            "MWAARedshiftCluster",
             master_user=redshift.Login(
                master_username=props['redshiftusername']
            ),
            vpc = vpc,
            security_groups=[default_redshift_security_group],
            node_type=redshift.NodeType.RA3_4XLARGE,
            number_of_nodes=2,
            cluster_name=redshiftclustername,
            default_database_name=props['redshiftdb'],
            removal_policy=core.RemovalPolicy.DESTROY,
            roles=[mwaa_redshift_service_role],
            publicly_accessible=False,
            subnet_group=redshift_cluster_subnet_group
        )

        redshift_secret_arn = cluster.secret.secret_arn
        

        # Display some useful output

        core.CfnOutput(
            self,
            id="RedshiftSecretARN :",
            value=redshift_secret_arn,
            description="This is the Redshift Secret ARN"
        )

        core.CfnOutput(
            self,
            id="RedshiftIAMARN :",
            value=mwaa_redshift_service_role_arn,
            description="This is the Redshift IAM ARN"
        )

        core.CfnOutput(
            self,
            id="RedshiftClusterEndpoint :",
            value=cluster.cluster_endpoint.hostname,
            description="This is the Redshift Cluster Endpoint"
        )
        core.CfnOutput(
            self,
            id="MWAAVPCESG :",
            value=vpe_redshift_cluster_subnet_group.ref,
            description="This is the VPE Subnet Group to use when creating the VPC Endpoint"
        )
        core.CfnOutput(
            self,
            id="redshiftvpcendpointcli",
            value="aws redshift create-endpoint-access --cluster-identifier "+redshiftclustername+" --resource-owner "+self.account+ " --endpoint-name mwaa-redshift-endpoint --subnet-group-name "+vpe_redshift_cluster_subnet_group.ref+" --vpc-security-group-ids "+props['mwaa-sg'],
            description="Use this command to create your vpce"
        )        
 def _setup_redshift(self) -> None:
     port = 5439
     database = "test"
     schema = "public"
     redshift_role = iam.Role(
         self,
         "aws-data-wrangler-redshift-role",
         assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"),
         inline_policies={
             "KMS":
             iam.PolicyDocument(statements=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=[
                         "kms:Encrypt",
                         "kms:Decrypt",
                         "kms:GenerateDataKey",
                     ],
                     resources=[self.key.key_arn],
                 )
             ]),
             "S3":
             iam.PolicyDocument(statements=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=[
                         "s3:Get*",
                         "s3:List*",
                         "s3:Put*",
                     ],
                     resources=[
                         self.bucket.bucket_arn,
                         f"{self.bucket.bucket_arn}/*",
                     ],
                 )
             ]),
             "LakeFormation":
             iam.PolicyDocument(statements=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=[
                         "lakeformation:GetDataAccess",
                         "lakeformation:GrantPermissions",
                         "lakeformation:GetWorkUnits",
                         "lakeformation:StartQueryPlanning",
                         "lakeformation:GetWorkUnitResults",
                         "lakeformation:GetQueryState",
                     ],
                     resources=["*"],
                 )
             ]),
             "Glue":
             iam.PolicyDocument(statements=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=[
                         "glue:SearchTables",
                         "glue:GetConnections",
                         "glue:GetDataCatalogEncryptionSettings",
                         "glue:GetTables",
                         "glue:GetTableVersions",
                         "glue:GetPartitions",
                         "glue:DeleteTableVersion",
                         "glue:BatchGetPartition",
                         "glue:GetDatabases",
                         "glue:GetTags",
                         "glue:GetTable",
                         "glue:GetDatabase",
                         "glue:GetPartition",
                         "glue:GetTableVersion",
                         "glue:GetConnection",
                         "glue:GetUserDefinedFunction",
                         "glue:GetUserDefinedFunctions",
                     ],
                     resources=["*"],
                 )
             ]),
         },
     )
     lf.CfnPermissions(
         self,
         "CodeBuildTestRoleLFPermissions",
         data_lake_principal=lf.CfnPermissions.DataLakePrincipalProperty(
             data_lake_principal_identifier=redshift_role.role_arn),
         resource=lf.CfnPermissions.ResourceProperty(
             table_resource=lf.CfnPermissions.TableResourceProperty(
                 database_name="aws_data_wrangler",
                 table_wildcard={},  # type: ignore
             )),
         permissions=[
             "SELECT", "ALTER", "DESCRIBE", "DROP", "DELETE", "INSERT"
         ],
     )
     redshift.ClusterSubnetGroup(
         self,
         "aws-data-wrangler-redshift-subnet-group",
         description="AWS Data Wrangler Test Arena - Redshift Subnet Group",
         vpc=self.vpc,
         vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
     )
     redshift_cluster = redshift.Cluster(
         self,
         "aws-data-wrangler-redshift-cluster",
         default_database_name=database,
         master_user=redshift.Login(
             master_username=self.db_username,
             master_password=self.db_password_secret,
         ),
         cluster_type=redshift.ClusterType.SINGLE_NODE,
         publicly_accessible=True,
         port=port,
         vpc=self.vpc,
         vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
         security_groups=[self.db_security_group],
         roles=[redshift_role],
     )
     glue.Connection(
         self,
         "aws-data-wrangler-redshift-glue-connection",
         description="Connect to Redshift.",
         type=glue.ConnectionType.JDBC,
         connection_name="aws-data-wrangler-redshift",
         properties={
             "JDBC_CONNECTION_URL":
             f"jdbc:redshift://{redshift_cluster.cluster_endpoint.hostname}:{port}/{database}",  # noqa: E501
             "USERNAME": self.db_username,
             "PASSWORD": self.db_password,
         },
         subnet=self.vpc.private_subnets[0],
         security_groups=[self.db_security_group],
     )
     secret = secrets.Secret(
         self,
         "aws-data-wrangler-redshift-secret",
         secret_name="aws-data-wrangler/redshift",
         description="Redshift credentials",
         generate_secret_string=secrets.SecretStringGenerator(
             generate_string_key="dummy",
             secret_string_template=json.dumps({
                 "username":
                 self.db_username,
                 "password":
                 self.db_password,
                 "engine":
                 "redshift",
                 "host":
                 redshift_cluster.cluster_endpoint.hostname,
                 "port":
                 port,
                 "dbClusterIdentifier":
                 redshift_cluster.cluster_name,
             }),
         ),
     )
     cdk.CfnOutput(self, "RedshiftSecretArn", value=secret.secret_arn)
     cdk.CfnOutput(self,
                   "RedshiftIdentifier",
                   value=redshift_cluster.cluster_name)
     cdk.CfnOutput(
         self,
         "RedshiftAddress",
         value=redshift_cluster.cluster_endpoint.hostname,
     )
     cdk.CfnOutput(self, "RedshiftPort", value=str(port))
     cdk.CfnOutput(self, "RedshiftDatabase", value=database)
     cdk.CfnOutput(self, "RedshiftSchema", value=schema)
     cdk.CfnOutput(self, "RedshiftRole", value=redshift_role.role_arn)