def __init__(self, scope: core.Construct, id: str, vpc: aws_ec2.Vpc, **kwargs) -> None: super().__init__(scope, id, **kwargs) _subnets = [] _subnets.append( aws_ec2.Subnet(self, 'sbn-redshift-1', availability_zone=vpc.availability_zones[0], vpc_id=vpc.vpc_id, cidr_block='10.0.4.0/25')) _subnets.append( aws_ec2.Subnet(self, 'sbn-redshift-2', availability_zone=vpc.availability_zones[1], vpc_id=vpc.vpc_id, cidr_block='10.0.4.128/25')) _cluster_subnet_group = aws_redshift.ClusterSubnetGroup( self, 'deta-pipeline-redshift-subnet', description='redshift cluster subnet', vpc=vpc, vpc_subnets=aws_ec2.SubnetSelection(subnets=_subnets)) aws_redshift.Cluster( self, 'destination-redshift', master_user=aws_redshift.Login(master_username='******'), vpc=vpc, subnet_group=_cluster_subnet_group)
def __init__(self, scope: core.Construct, id: str, vpc: VpcStack, **kwargs) -> None: super().__init__(scope, id, **kwargs) subnet_group = redshift.ClusterSubnetGroup( self, id="RedshiftSubnetGroup", description="Redshift private subnet group", vpc=vpc.instance, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.ISOLATED), ) self.redshift_secret = sm.Secret( self, "redshift-credentials", secret_name="redshift-credentials", description="Credentials for Amazon Redshift cluster.", generate_secret_string=sm.SecretStringGenerator( secret_string_template='{"username": "******"}', generate_string_key="password", password_length=32, exclude_characters='"@\\\/', exclude_punctuation=True, ), ) redshift_login = redshift.Login( master_username="******", master_password=self.redshift_secret.secret_value_from_json( "password"), ) redshift_s3_read_access_role = iam.Role( self, "redshiftS3AccessRole", role_name="redshiftS3AccessRole", assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess") ], ) redshift_cluster = redshift.Cluster( self, id="redshift-cluster", master_user=redshift_login, vpc=vpc, cluster_type=redshift.ClusterType.SINGLE_NODE, default_database_name="redshift-db", encrypted=True, node_type=redshift.NodeType.DC2_LARGE, port=5439, roles=[redshift_s3_read_access_role], security_groups=[vpc.redshift_sg], subnet_group=subnet_group, removal_policy=core.RemovalPolicy.DESTROY, ) self._instance = redshift_cluster
def __init__(self, scope: core.Construct, id: str, vpc, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) # create s3 bucket that redshift will use. if this bucket exists # this cdk app will fail, so ensure this has not been created yet redshift_bucket = s3.Bucket( self, "mwaa-redshift import", bucket_name=f"{props['redshifts3location'].lower()}", versioned=True, block_public_access=s3.BlockPublicAccess.BLOCK_ALL ) # create the files folder in the bucket - this is empty but needed in the DAG s3deploy.BucketDeployment(self, "File", sources=[s3deploy.Source.asset("./files")], destination_bucket=redshift_bucket, destination_key_prefix="files/", prune=False, retain_on_delete=False ) redshift_bucket_arn = redshift_bucket.bucket_arn # get arn of dags bucket - not sure if this is needed so may remove dags_bucket = s3.Bucket.from_bucket_name(self, "mwaa-dag-bucket", f"{props['mwaadag'].lower()}") dags_bucket_arn = dags_bucket.bucket_arn # create redshift secret and redshift user # create redshift iam role/policy that we will attach to the RedShift cluster # that has the right level of access to a specific S3 bucket # you can further lockdown this policy by just specifying s3 actions. mwaa_redshift_policy_document = iam.PolicyDocument( statements=[ iam.PolicyStatement( actions=[ "s3:*" ], effect=iam.Effect.ALLOW, resources=[ f"{redshift_bucket_arn}/*", f"{redshift_bucket_arn}", f"{dags_bucket_arn}/*", f"{dags_bucket_arn}", ] ) ] ) mwaa_redshift_service_role = iam.Role( self, "mwaa-redshift-service-role2nd", assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"), inline_policies={"mwaaRedshiftPolicyDocument": mwaa_redshift_policy_document} ) mwaa_redshift_service_role_arn = mwaa_redshift_service_role.role_arn # Setup Security Group default_redshift_security_group = ec2.SecurityGroup.from_security_group_id( self, "MWAARedshiftSG", security_group_id=vpc.vpc_default_security_group ) default_redshift_security_group.add_ingress_rule( peer=default_redshift_security_group, connection=ec2.Port.tcp(5439) ) # Modify MWAA security group to enable Redshift access mwaa_security_group = ec2.SecurityGroup.from_security_group_id( self, "SG", props['mwaa-sg'] #mutable=False ) mwaa_security_group.add_ingress_rule(ec2.Peer.any_ipv4(), ec2.Port.tcp(5439), "allow redshift access") # create subnet groups - one for RedShift and one for the VPE we will create # the VPE subnet will take in parameters we provide that are the subnet-ids # of the VPC where MWAA is deployed redshift_cluster_subnet_group = redshift.ClusterSubnetGroup( self, "RedshiftCSG", vpc = vpc, #vpc_subnets = ec2.SubnetSelection(subnets=vpc.private_subnets), vpc_subnets = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), description="Redshift Cluster Subnet Group" ) ## get all the subnet ids from the MWAA VPC subnet_ids = [] mwaavpc = ec2.Vpc.from_lookup( self, "MWAA VPC", vpc_id=props['mwaa-vpc-id'] ) for subnet in mwaavpc.private_subnets: subnet_ids.append(subnet.subnet_id) for subnet in mwaavpc.public_subnets: subnet_ids.append(subnet.subnet_id) vpe_redshift_cluster_subnet_group = redshift.CfnClusterSubnetGroup( self, "MWAAVPERedshiftCSG", subnet_ids = subnet_ids, description="MWAA VPE Redshift Cluster Subnet Group" ) redshiftclustername = f"{props['redshiftclustername'].lower()}" cluster = redshift.Cluster( self, "MWAARedshiftCluster", master_user=redshift.Login( master_username=props['redshiftusername'] ), vpc = vpc, security_groups=[default_redshift_security_group], node_type=redshift.NodeType.RA3_4XLARGE, number_of_nodes=2, cluster_name=redshiftclustername, default_database_name=props['redshiftdb'], removal_policy=core.RemovalPolicy.DESTROY, roles=[mwaa_redshift_service_role], publicly_accessible=False, subnet_group=redshift_cluster_subnet_group ) redshift_secret_arn = cluster.secret.secret_arn # Display some useful output core.CfnOutput( self, id="RedshiftSecretARN :", value=redshift_secret_arn, description="This is the Redshift Secret ARN" ) core.CfnOutput( self, id="RedshiftIAMARN :", value=mwaa_redshift_service_role_arn, description="This is the Redshift IAM ARN" ) core.CfnOutput( self, id="RedshiftClusterEndpoint :", value=cluster.cluster_endpoint.hostname, description="This is the Redshift Cluster Endpoint" ) core.CfnOutput( self, id="MWAAVPCESG :", value=vpe_redshift_cluster_subnet_group.ref, description="This is the VPE Subnet Group to use when creating the VPC Endpoint" ) core.CfnOutput( self, id="redshiftvpcendpointcli", value="aws redshift create-endpoint-access --cluster-identifier "+redshiftclustername+" --resource-owner "+self.account+ " --endpoint-name mwaa-redshift-endpoint --subnet-group-name "+vpe_redshift_cluster_subnet_group.ref+" --vpc-security-group-ids "+props['mwaa-sg'], description="Use this command to create your vpce" )
def _setup_redshift(self) -> None: port = 5439 database = "test" schema = "public" redshift_role = iam.Role( self, "aws-data-wrangler-redshift-role", assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"), inline_policies={ "KMS": iam.PolicyDocument(statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "kms:Encrypt", "kms:Decrypt", "kms:GenerateDataKey", ], resources=[self.key.key_arn], ) ]), "S3": iam.PolicyDocument(statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "s3:Get*", "s3:List*", "s3:Put*", ], resources=[ self.bucket.bucket_arn, f"{self.bucket.bucket_arn}/*", ], ) ]), "LakeFormation": iam.PolicyDocument(statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "lakeformation:GetDataAccess", "lakeformation:GrantPermissions", "lakeformation:GetWorkUnits", "lakeformation:StartQueryPlanning", "lakeformation:GetWorkUnitResults", "lakeformation:GetQueryState", ], resources=["*"], ) ]), "Glue": iam.PolicyDocument(statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "glue:SearchTables", "glue:GetConnections", "glue:GetDataCatalogEncryptionSettings", "glue:GetTables", "glue:GetTableVersions", "glue:GetPartitions", "glue:DeleteTableVersion", "glue:BatchGetPartition", "glue:GetDatabases", "glue:GetTags", "glue:GetTable", "glue:GetDatabase", "glue:GetPartition", "glue:GetTableVersion", "glue:GetConnection", "glue:GetUserDefinedFunction", "glue:GetUserDefinedFunctions", ], resources=["*"], ) ]), }, ) lf.CfnPermissions( self, "CodeBuildTestRoleLFPermissions", data_lake_principal=lf.CfnPermissions.DataLakePrincipalProperty( data_lake_principal_identifier=redshift_role.role_arn), resource=lf.CfnPermissions.ResourceProperty( table_resource=lf.CfnPermissions.TableResourceProperty( database_name="aws_data_wrangler", table_wildcard={}, # type: ignore )), permissions=[ "SELECT", "ALTER", "DESCRIBE", "DROP", "DELETE", "INSERT" ], ) redshift.ClusterSubnetGroup( self, "aws-data-wrangler-redshift-subnet-group", description="AWS Data Wrangler Test Arena - Redshift Subnet Group", vpc=self.vpc, vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), ) redshift_cluster = redshift.Cluster( self, "aws-data-wrangler-redshift-cluster", default_database_name=database, master_user=redshift.Login( master_username=self.db_username, master_password=self.db_password_secret, ), cluster_type=redshift.ClusterType.SINGLE_NODE, publicly_accessible=True, port=port, vpc=self.vpc, vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), security_groups=[self.db_security_group], roles=[redshift_role], ) glue.Connection( self, "aws-data-wrangler-redshift-glue-connection", description="Connect to Redshift.", type=glue.ConnectionType.JDBC, connection_name="aws-data-wrangler-redshift", properties={ "JDBC_CONNECTION_URL": f"jdbc:redshift://{redshift_cluster.cluster_endpoint.hostname}:{port}/{database}", # noqa: E501 "USERNAME": self.db_username, "PASSWORD": self.db_password, }, subnet=self.vpc.private_subnets[0], security_groups=[self.db_security_group], ) secret = secrets.Secret( self, "aws-data-wrangler-redshift-secret", secret_name="aws-data-wrangler/redshift", description="Redshift credentials", generate_secret_string=secrets.SecretStringGenerator( generate_string_key="dummy", secret_string_template=json.dumps({ "username": self.db_username, "password": self.db_password, "engine": "redshift", "host": redshift_cluster.cluster_endpoint.hostname, "port": port, "dbClusterIdentifier": redshift_cluster.cluster_name, }), ), ) cdk.CfnOutput(self, "RedshiftSecretArn", value=secret.secret_arn) cdk.CfnOutput(self, "RedshiftIdentifier", value=redshift_cluster.cluster_name) cdk.CfnOutput( self, "RedshiftAddress", value=redshift_cluster.cluster_endpoint.hostname, ) cdk.CfnOutput(self, "RedshiftPort", value=str(port)) cdk.CfnOutput(self, "RedshiftDatabase", value=database) cdk.CfnOutput(self, "RedshiftSchema", value=schema) cdk.CfnOutput(self, "RedshiftRole", value=redshift_role.role_arn)