def _create_bucket(self, name, origin_access_identity): bucket = s3.Bucket(f'website-{self.name}-{self.stack}-{name}', acl='private', tags=self.tags, opts=pulumi.ResourceOptions(parent=self)) s3.BucketPublicAccessBlock(f'website-{self.name}-{self.stack}-{name}', bucket=bucket.id, block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=True, opts=pulumi.ResourceOptions(parent=self)) s3.BucketPolicy(f'website-{self.name}-{self.stack}-{name}-policy', bucket=bucket.id, policy=pulumi.Output.all( origin_access_identity.iam_arn, bucket.arn).apply(self._get_s3_policy), opts=pulumi.ResourceOptions(parent=self)) return bucket
} }] }) # apply policy to bucket bucket_name = bucket.id bucket_policy = s3.BucketPolicy( "bucket-policy", bucket=bucket_name, policy=bucket_name.apply(bucket_policy_cloudtrial)) # s3 Block Public access s3Access = s3.BucketPublicAccessBlock("Block Public access", block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=True, bucket=bucket.id) # Create cloudwatch log cloudwatch_log = cloudwatch.LogGroup('LogGroup', name='S3BucketActivity') # add log stream to cloudwatch log log_stream = cloudwatch.LogStream("CloudWatch_log_stream", log_group_name=cloudwatch_log.name, name='818155059458_CloudTrail_eu-west-2') # configure cloudwatch metrics cloudwatch_metrics = cloudwatch.LogMetricFilter( 'Metrics', log_group_name=cloudwatch_log.name,
""" Create a Bucket to store our data """ import pulumi from pulumi_aws import s3 env = pulumi.get_stack() bucket = s3.Bucket(f"nextcloud-bucket-{env}") # Block all public access https://docs.aws.amazon.com/AmazonS3/latest/dev/access-control-block-public-access.html s3.BucketPublicAccessBlock(f"nextcloud-bucket-public-block-{env}", bucket=bucket.id, block_public_acls=True, ignore_public_acls=True, block_public_policy=True, restrict_public_buckets=True) # Export the name of the bucket #pulumi.export('bucket_name', bucket.id) #pulumi.export('bucket_arn', bucket.arn)
}) # apply policy to bucket bucket_name = bucket.id bucket_policy = s3.BucketPolicy( "bucket-policy", opts=ResourceOptions(depends_on=[bucket]), bucket=bucket_name, policy=bucket_name.apply(bucket_policy_cloudtrial)) # s3 Block Public access s3Access = s3.BucketPublicAccessBlock( "Block Public access", opts=ResourceOptions(depends_on=[bucket_policy]), block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=True, bucket=bucket.id) # Create cloudwatch log cloudwatch_log = cloudwatch.LogGroup('LogGroup', name='S3BucketActivity') # add log stream to cloudwatch log log_stream = cloudwatch.LogStream("CloudWatch_log_stream", log_group_name=cloudwatch_log.name, name=f"{account_id}_CloudTrail_{region}") # configure cloudwatch metrics cloudwatch_metrics = cloudwatch.LogMetricFilter( 'Metrics',
import pulumi from pulumi import Output, ResourceOptions, export from pulumi_aws import s3, ec2 # create bucket for VPC logs bucket_vpc_and_subnets = s3.Bucket('vpc-and-subnet-flow-logs', force_destroy=True) # s3 Block Public access s3.BucketPublicAccessBlock(f"Block_Public_access", block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=True, bucket=bucket_vpc_and_subnets.id) # Find VPC information vpcs = ec2.get_vpcs().ids for vpc in vpcs: ec2.FlowLog(f"flowlogs_{vpc}", opts=ResourceOptions(depends_on=[bucket_vpc_and_subnets]), log_destination=bucket_vpc_and_subnets.arn, log_destination_type='s3', traffic_type='ALL', vpc_id=vpc)
def __init__(self, name, tags: Dict[str, str] = None, opts: pulumi.ResourceOptions = None): super().__init__('hca:DatalakeInfra', name, None, opts) aws_region = pulumi.Config('aws').get('region') self.tags = tags if tags is None else {} identity = get_caller_identity() self.kms_key = kms.Key( f"{name}-kms-key", description="kms key for encryption of datalake", policy=key_policy(identity.account_id, aws_region), tags=self.tags, opts=pulumi.ResourceOptions(parent=self)) alias = kms.Alias(f"{name}-kms-key-alias", target_key_id=self.kms_key.id, name=f"alias/hca/{name}", opts=pulumi.ResourceOptions( parent=self, delete_before_replace=True)) # create datalake bucket self.datalake_bucket = s3.Bucket( f"{name}-bucket", lifecycle_rules=datalake_lifecycle_rules(), server_side_encryption_configuration={ 'rule': { 'applyServerSideEncryptionByDefault': { 'kmsMasterKeyId': self.kms_key.arn, 'sseAlgorithm': 'aws:kms' } } }, versioning={'enabled': True}, tags=self.tags, opts=pulumi.ResourceOptions(parent=self)) s3.BucketPolicy( f"{name}-bucket-policy", bucket=self.datalake_bucket, policy=pulumi.Output.all( self.datalake_bucket.bucket, self.kms_key.arn).apply(lambda p: bucket_policy(p[0], p[1])), opts=pulumi.ResourceOptions(parent=self)) s3.BucketPublicAccessBlock(f"{name}-access-block", bucket=self.datalake_bucket, block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=True, opts=pulumi.ResourceOptions(parent=self)) # define folder paths for datalake bucket self.raw_location = self.datalake_bucket.bucket.apply( lambda b: f"s3://{b}/raw") self.mart_location = self.datalake_bucket.bucket.apply( lambda b: f"s3://{b}/mart") self.archive_location = self.datalake_bucket.bucket.apply( lambda b: f"s3://{b}/archive") self.delta_location = self.datalake_bucket.bucket.apply( lambda b: f"s3://{b}/delta") # create fileproc bucket self.fileproc_bucket = s3.Bucket( f"{name}-fileproc-bucket", lifecycle_rules=fileproc_lifecycle_rules(), server_side_encryption_configuration={ 'rule': { 'applyServerSideEncryptionByDefault': { 'kmsMasterKeyId': self.kms_key.arn, 'sseAlgorithm': 'aws:kms' } } }, versioning={'enabled': True}, tags=self.tags, opts=pulumi.ResourceOptions(parent=self)) s3.BucketPolicy(f"{name}-fileproc-bucket-policy", bucket=self.fileproc_bucket, policy=pulumi.Output.all( self.fileproc_bucket.bucket, self.kms_key.arn).apply( lambda p: fileproc_bucket_policy(p[0], p[1])), opts=pulumi.ResourceOptions(parent=self)) s3.BucketPublicAccessBlock(f"{name}-fileproc-access-block", bucket=self.fileproc_bucket, block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=False, opts=pulumi.ResourceOptions(parent=self)) # create scripts bucket self.scripts_bucket = s3.Bucket( f"{name}-script-bucket", server_side_encryption_configuration={ 'rule': { 'applyServerSideEncryptionByDefault': { 'kmsMasterKeyId': self.kms_key.arn, 'sseAlgorithm': 'aws:kms' } } }, versioning={'enabled': True}, tags=self.tags, opts=pulumi.ResourceOptions(parent=self)) s3.BucketPolicy(f"{name}-script-bucket-policy", bucket=self.scripts_bucket, policy=pulumi.Output.all( self.scripts_bucket.bucket, self.kms_key.arn).apply( lambda p: scripts_bucket_policy(p[0], p[1])), opts=pulumi.ResourceOptions(parent=self)) s3.BucketPublicAccessBlock(f"{name}-script-access-block", bucket=self.scripts_bucket, block_public_acls=True, block_public_policy=True, ignore_public_acls=True, restrict_public_buckets=False, opts=pulumi.ResourceOptions(parent=self)) # create dataclassification policies for getobject dataclassifications = ['pii', 'confidential', 'nonsensitive'] self.policy_get_object_pii = iam.Policy( f"{name}-pii-policy", description="allow get access to pii data", policy=self.datalake_bucket.id.apply( lambda b: dataclassification_policy(b, dataclassifications)), path='/', opts=pulumi.ResourceOptions(parent=self)) self.policy_get_object_confidential = iam.Policy( f"{name}-confidential-policy", description="allow get access to confidential data", policy=self.datalake_bucket.id.apply( lambda b: dataclassification_policy(b, dataclassifications[1:] )), path='/', opts=pulumi.ResourceOptions(parent=self)) self.policy_get_object_nonsensitive = iam.Policy( f"{name}-nonsensitive-policy", description="allow get access to nonsensitive data", policy=self.datalake_bucket.id.apply( lambda b: dataclassification_policy(b, dataclassifications[2:] )), path='/', opts=pulumi.ResourceOptions(parent=self)) # create kms policies self.policy_kms_full_usage = iam.Policy( f"{name}-iam-key-full-usage", description="allow encrypt/decrypt with datalake kms key", policy=self.kms_key.arn.apply(kms_usage_policy), path='/', opts=pulumi.ResourceOptions(parent=self)) self.policy_kms_encrypt_only = iam.Policy( f"{name}-iam-key-encrypt-only", description="allow encrypt only with datalake kms key", policy=self.kms_key.arn.apply(kms_encrypt_policy), path='/', opts=pulumi.ResourceOptions(parent=self)) # create policy for getting scripts self.policy_get_scripts = iam.Policy( f"{name}-get-scripts", description="allow get access glue scripts bucket", policy=self.scripts_bucket.bucket.apply(get_scripts_policy), path='/', opts=pulumi.ResourceOptions(parent=self)) # get glue service policy (create custom one later) self.policy_glue_service = iam.Policy.get( f"{name}-glue-service", 'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole', opts=pulumi.ResourceOptions(parent=self)) # create glue security config # use specific name as any changes will trigger replacement of resource self.glue_security_config = glue.SecurityConfiguration( f"{name}-security-config", name=name, encryption_configuration={ 'cloudwatchEncryption': { 'cloudwatchEncryptionMode': 'SSE-KMS', 'kms_key_arn': self.kms_key.arn }, 's3Encryption': { 's3EncryptionMode': 'SSE-KMS', 'kms_key_arn': self.kms_key.arn }, 'jobBookmarksEncryption': { 'jobBookmarksEncryptionMode': 'DISABLED' } }, opts=pulumi.ResourceOptions(parent=self))