def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) s3bucket = s3.Bucket(self, 'vika-yy') kds = data_stream.Stream(self, 'data_stream', shard_count=1) delivery_stream_role = iam.Role( self, 'kdfdelivery_stream_role_role', assumed_by=iam.ServicePrincipal('firehose.amazonaws.com')) delivery_stream_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonKinesisFullAccess')) delivery_stream_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=[s3bucket.bucket_arn], actions=["s3:*"])) #s3bucket = s3.Bucket(self, 'vika-yy',bucket_name='yellowtaxicdk-input') s3_dest_config = delivery_stream.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=s3bucket.bucket_arn, buffering_hints=delivery_stream.CfnDeliveryStream. BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=128), role_arn=delivery_stream_role.role_arn, compression_format='UNCOMPRESSED', s3_backup_mode='Disabled') stream_source_config = delivery_stream.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty( kinesis_stream_arn=kds.stream_arn, role_arn=delivery_stream_role.role_arn) kfirehose = delivery_stream.CfnDeliveryStream( self, 'kfirehose', delivery_stream_name='deliverystream', delivery_stream_type='KinesisStreamAsSource', extended_s3_destination_configuration=s3_dest_config, kinesis_stream_source_configuration=stream_source_config) glue_role = iam.Role( self, 'glue_role', assumed_by=iam.ServicePrincipal('glue.amazonaws.com')) glue_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=[s3bucket.bucket_arn], actions=["s3:*"])) glue_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSGlueServiceRole')) bucket_name = s3bucket.bucket_name glue_crawler = glue.CfnCrawler( self, 'glue_crawler', database_name='yellow-taxis', role=glue_role.role_arn, #targets={"s3Targets": [{"path": f'{BUCKET}/input/'}]} targets={"s3Targets": [{ "path": f'{bucket_name}/input/' }]})
def __init__(self, scope: core.Construct, common: Common, data_lake: DataLake, **kwargs) -> None: self.env = common.env super().__init__(scope, id=f'{self.env}-data-lake-raw-ingestion', **kwargs) name = f'firehose-{self.env}-raw-delivery-stream' raw_bucket = data_lake.data_lake_raw_bucket kinesis_role = RawKinesisRole(self, environment=common.env, raw_bucket=raw_bucket) s3_config = firehose.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=raw_bucket.bucket_arn, compression_format='ZIP', error_output_prefix='bad_records', prefix='atomic_events/year=!{timestamp:yyyy}/' 'month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/', buffering_hints=firehose.CfnDeliveryStream.BufferingHintsProperty( interval_in_seconds=60, size_in_m_bs=1), role_arn=kinesis_role.role_arn) self.atomic_events = firehose.CfnDeliveryStream( self, id=name, delivery_stream_name=name, delivery_stream_type='DirectPut', extended_s3_destination_configuration=s3_config) self.dms_replication_task = OrdersDMS(self, common, data_lake)
def event_streams(self, bucket, event_recorder, event_sources): stream_role = _iam.Role( self, "StreamRole", assumed_by=_iam.ServicePrincipal('firehose.amazonaws.com')) bucket.grant_write(stream_role) event_streams = [] for source in event_sources: event_streams.append( _kfh.CfnDeliveryStream( self, "{}Stream".format(source.capitalize()), delivery_stream_name=source, delivery_stream_type='DirectPut', extended_s3_destination_configuration=_kfh. CfnDeliveryStream. ExtendedS3DestinationConfigurationProperty( bucket_arn=bucket.bucket_arn, buffering_hints=_kfh.CfnDeliveryStream. BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=10), compression_format='GZIP', role_arn=stream_role.role_arn, prefix="{}/".format(source)))) return event_streams
def create_firehose( self, delivery_bucket, firehose_role_arn) -> aws_kinesisfirehose.CfnDeliveryStream: """ Creates a Firehose DeliveryStream configured to deliver to the S3 Bucket `delivery_bucket`, and log errors to a log stream named 'S3Delivery' in `log_group`. Firehose will adopt the role specified in `firehose_role_arn`. :param delivery_bucket: The delivery destination bucket for the Firehose :param firehose_role_arn: The role to adopt :return: a CfnDeliveryStream """ firehose = aws_kinesisfirehose.CfnDeliveryStream( self, "integ_test_firehose", extended_s3_destination_configuration={ "bucketArn": delivery_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 50 }, "compressionFormat": "ZIP", "roleArn": firehose_role_arn, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": FirehoseStack.LOG_GROUP_NAME, "logStreamName": FirehoseStack.LOG_STREAM_NAME, }, }, ) return firehose
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name') role_name_cwl_to_kdf = cdk.Fn.import_value('siem-cwl-to-kdf-role-name') role_name_kdf_to_s3 = cdk.Fn.import_value('siem-kdf-to-s3-role-name') kdf_ad_name = cdk.CfnParameter( self, 'KdfAdName', description='Kinesis Data Firehose Name to deliver AD event', default='siem-ad-event-to-s3') kdf_buffer_size = cdk.CfnParameter( self, 'KdfBufferSize', type='Number', description='Enter a buffer size between 1 - 128 (MiB)', default=1, min_value=1, max_value=128) kdf_buffer_interval = cdk.CfnParameter( self, 'KdfBufferInterval', type='Number', description='Enter a buffer interval between 60 - 900 (seconds.)', default=60, min_value=60, max_value=900) cwl_ad_name = cdk.CfnParameter( self, 'CwlAdName', description='CloudWatch Logs group name', default='/aws/directoryservice/d-XXXXXXXXXXXXXXXXX') kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream( self, "KDFForAdEventLog", delivery_stream_name=kdf_ad_name.value_as_string, s3_destination_configuration=CDS. S3DestinationConfigurationProperty( bucket_arn=f'arn:aws:s3:::{log_bucket_name}', prefix= f'AWSLogs/{cdk.Aws.ACCOUNT_ID}/DirectoryService/MicrosoftAD/', buffering_hints=CDS.BufferingHintsProperty( interval_in_seconds=kdf_buffer_interval.value_as_number, size_in_m_bs=kdf_buffer_size.value_as_number), compression_format='UNCOMPRESSED', role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/' f'service-role/{role_name_kdf_to_s3}'))) aws_logs.CfnSubscriptionFilter( self, 'KinesisSubscription', destination_arn=kdf_to_s3.attr_arn, filter_pattern='', log_group_name=cwl_ad_name.value_as_string, role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/' f'{role_name_cwl_to_kdf}'))
def __init__(self, scope: cdk.Construct, construct_id: str, default_firehose_name='siem-XXXXXXXXXXX-to-s3', firehose_compression_format='UNCOMPRESSED', **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name') role_name_kdf_to_s3 = cdk.Fn.import_value('siem-kdf-to-s3-role-name') kdf_name = cdk.CfnParameter( self, 'FirehoseName', description=('New Kinesis Data Firehose Name to deliver log. ' 'modify XXXXXXXXX'), default=default_firehose_name) kdf_buffer_size = cdk.CfnParameter( self, 'FirehoseBufferSize', type='Number', description='Enter a buffer size between 1 - 128 (MiB)', default=1, min_value=1, max_value=128) kdf_buffer_interval = cdk.CfnParameter( self, 'FirehoseBufferInterval', type='Number', description='Enter a buffer interval between 60 - 900 (seconds.)', default=60, min_value=60, max_value=900) s3_desitination_prefix = cdk.CfnParameter( self, 'S3DestPrefix', description='S3 destination prefix', default='AWSLogs/YourAccuntId/LogType/Region/') self.kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream( self, "Kdf", delivery_stream_name=kdf_name.value_as_string, s3_destination_configuration=CDS. S3DestinationConfigurationProperty( bucket_arn=f'arn:aws:s3:::{log_bucket_name}', prefix=s3_desitination_prefix.value_as_string, buffering_hints=CDS.BufferingHintsProperty( interval_in_seconds=kdf_buffer_interval.value_as_number, size_in_m_bs=kdf_buffer_size.value_as_number), compression_format=firehose_compression_format, role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/' f'service-role/{role_name_kdf_to_s3}')))
def __init__(self, scope: core.Construct, id: str, props: FirehoseProps, **kwargs) -> None: # Firehose Stream = firehose.CfnDeliveryStream ExtendedS3DestConfProp = Stream.ExtendedS3DestinationConfigurationProperty FormatConversionProp = Stream.DataFormatConversionConfigurationProperty InputFormatConfProp = Stream.InputFormatConfigurationProperty OutputFormatConfProp = Stream.OutputFormatConfigurationProperty DeserializerProperty = Stream.DeserializerProperty SerializerProperty = Stream.SerializerProperty OpenXJsonSerDeProperty = Stream.OpenXJsonSerDeProperty ParquetSerDeProperty = Stream.ParquetSerDeProperty BufferingHintsProp = Stream.BufferingHintsProperty SchemaConfigProp = Stream.SchemaConfigurationProperty SourceStreamProp = Stream.KinesisStreamSourceConfigurationProperty iam_role_firehose_analytical = props.role self.delivery_stream = firehose.CfnDeliveryStream( scope, 'deliveryClickstream', delivery_stream_name='deliveryClickStream', delivery_stream_type='KinesisStreamAsSource', kinesis_stream_source_configuration=SourceStreamProp( kinesis_stream_arn=props.stream.stream_arn, role_arn=iam_role_firehose_analytical.role_arn), extended_s3_destination_configuration=ExtendedS3DestConfProp( bucket_arn=props.bucket.bucket_arn, role_arn=iam_role_firehose_analytical.role_arn, buffering_hints=BufferingHintsProp( interval_in_seconds=60, size_in_m_bs=128, ), data_format_conversion_configuration=FormatConversionProp( enabled=True, input_format_configuration=InputFormatConfProp( deserializer=DeserializerProperty( open_x_json_ser_de=OpenXJsonSerDeProperty(), ), ), output_format_configuration=OutputFormatConfProp( serializer=SerializerProperty( parquet_ser_de=ParquetSerDeProperty( compression='UNCOMPRESSED', enable_dictionary_compression=False, ), )), schema_configuration=SchemaConfigProp( database_name=props.glue_db.database_name, table_name=props.glue_table.table_name, role_arn=iam_role_firehose_analytical.role_arn, )), prefix='kinesis/'), )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) ### Code for FirehoseStack # get role arn value from ddbqiIam stack print("++++++++++++++++++++++++++++++++++++") #rolearn = core.Token.toString(core.Fn.import_value("rolearn")) rolearn = core.Fn.import_value("rolearn") esarn = core.Fn.import_value("esarn") print("++++++++++++++++++++++++++++++++++++") # creating s3 bucket for failed logs log_s3 = s3.Bucket(self, constants["S3_BUCKET_NAME"]) s3_config = { "bucketArn": log_s3.bucket_arn, #"roleArn": firehose_role.role_arn "roleArn": rolearn } es_dest_config = { "domainArn": esarn, "indexName": constants["DDBES_INDEX_NAME"], "roleArn": rolearn, "s3Configuration": s3_config, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, } self.firehose_deliverySystem = afh.CfnDeliveryStream( self, "ddbqiStream", delivery_stream_name=constants["FH_DELIVERY_STREAM_NAME"], delivery_stream_type="DirectPut", elasticsearch_destination_configuration=es_dest_config) core.Tags.of(self.firehose_deliverySystem).add( "project", constants["PROJECT_TAG"]) core.CfnOutput( self, 'StreamName', export_name="streamName", value=constants["FH_DELIVERY_STREAM_NAME"], description="firehose stream name", )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Create Bucket bucket = s3.Bucket(self, "cdk-firehose-bucket") # IAM Role for Firehose firehose_role = iam.Role( self, "FirehoseRole", assumed_by=iam.ServicePrincipal(service="firehose.amazonaws.com")) delivery_policy = iam.Policy( self, "FirehosePolicy", policy_name="FirehosePolicy", statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ], resources=[bucket.bucket_arn, bucket.bucket_arn + "/*"]) ]) delivery_policy.attach_to_role(firehose_role) # Firehose stream delivery_stream = firehose.CfnDeliveryStream( self, "QueueingStream", delivery_stream_name="QueueingStream", s3_destination_configuration={ "bucketArn": bucket.bucket_arn, "roleArn": firehose_role.role_arn }, elasticsearch_destination_configuration=None) # delivery_stream.add_depends_on(firehose_role) # We assign the stream's arn and name to a local variable for the Object. self._delivery_stream_name = delivery_stream.delivery_stream_name self._delivery_stream_arn = delivery_stream.attr_arn
def __init__(self, *args, **kwargs): super(LoggignResources, self).__init__(*args, **kwargs) # Netowrk self.vpc = ec2.Vpc(self, 'logging-vpc') self.backup_bucket = s3.Bucket(self, 'logging-backup', bucket_name='logging-backup-bucket') self.elastic_domain = es.CfnDomain(self, 'logging-es-cluster') self.stream = firehose.CfnDeliveryStream( self, 'logging-stream', delivery_stream_name='logging-stream-firehose', delivery_stream_type='DirectPut', elasticsearch_destination_configuration=self.elastic_domain, s3_destination_configuration=self.backup_bucket)
def __init__( self, scope: core.Construct, data_lake_raw_bucket: BaseDataLakeBucket, **kwargs, ) -> None: self.deploy_env = active_environment self.data_lake_raw_bucket = data_lake_raw_bucket super().__init__(scope, id=f"{self.deploy_env.value}-kinesis-stack", **kwargs) self.atomic_events = firehose.CfnDeliveryStream( self, id=f"firehose-{self.deploy_env.value}-raw-delivery-stream", delivery_stream_name= f"firehose-{self.deploy_env.value}-raw-delivery-stream", delivery_stream_type="DirectPut", extended_s3_destination_configuration=self.s3_config, )
def base_kinesis_firehose_delivery_stream(construct, **kwargs): # TODO: ADD ROLES, BUCKETS, AND FIREHOSE MINIMUM SETTINGS """ Function that generates a Kinesis Firehose Delivery Stream. :param construct: Custom construct that will use this function. From the external construct is usually 'self'. :param kwargs: Consist of required 'stream_name'. :return: Kinesis Stream Construct. """ stream_name = construct.prefix + "_" + kwargs["stream_name"] + "_" + "stream" + "_" + construct.environment_ destinations_config = firehose_destinations(kwargs["destinations"]) firehose_stream = fh_stream.CfnDeliveryStream( construct, id=stream_name, delivery_stream_name=stream_name, elasticsearch_destination_configuration=destinations_config["elasticsearch_destination_configuration"], extended_s3_destination_configuration=destinations_config["extended_s3_destination_configuration"], redshift_destination_configuration=destinations_config["redshift_destination_configuration"], s3_destination_configuration=destinations_config["s3_destination_configuration"], splunk_destination_configuration=destinations_config["splunk_destination_configuration"], ) return firehose_stream.attr_arn
def __init__(self, scope: Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # # Producer Lambda # event_producer_lambda = _lambda.Function(self, "eventProducerLambda", runtime=_lambda.Runtime.PYTHON_3_8, handler="event_producer_lambda.lambda_handler", code=_lambda.Code.from_asset("lambda") ) event_policy = iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=['*'], actions=['events:PutEvents']) event_producer_lambda.add_to_role_policy(event_policy) # # Approved Consumer1 # event_consumer1_lambda = _lambda.Function(self, "eventConsumer1Lambda", runtime=_lambda.Runtime.PYTHON_3_8, handler="event_consumer_lambda.lambda_handler", code=_lambda.Code.from_asset("lambda") ) event_consumer1_rule = events.Rule(self, 'eventConsumer1LambdaRule', description='Approved Transactions', event_pattern=events.EventPattern(source=['com.mycompany.myapp'] )) event_consumer1_rule.add_target(targets.LambdaFunction(handler=event_consumer1_lambda)) # # Approved Consumer2 # event_consumer2_lambda = _lambda.Function(self, "eventConsumer2Lambda", runtime=_lambda.Runtime.PYTHON_3_8, handler="event_consumer_lambda.lambda_handler", code=_lambda.Code.from_asset("lambda") ) event_consumer2_rule = events.Rule(self, 'eventConsumer2LambdaRule', description='Approved Transactions', event_pattern=events.EventPattern(source=['com.mycompany.myapp'] )) event_consumer2_rule.add_target(targets.LambdaFunction(handler=event_consumer2_lambda)) # # Approved Consumer3 # # Create S3 bucket for KinesisFirehose destination ingest_bucket = s3.Bucket(self, 'test-ngest-bucket') # Create a Role for KinesisFirehose firehose_role = iam.Role( self, 'myRole', assumed_by=iam.ServicePrincipal('firehose.amazonaws.com')) # Create and attach policy that gives permissions to write in to the S3 bucket. iam.Policy( self, 's3_attr', policy_name='s3kinesis', statements=[iam.PolicyStatement( actions=['s3:*'], resources=['arn:aws:s3:::' + ingest_bucket.bucket_name + '/*'])], # resources=['*'])], roles=[firehose_role], ) event_consumer3_kinesisfirehose = _firehose.CfnDeliveryStream(self, "consumer3-firehose", s3_destination_configuration=_firehose.CfnDeliveryStream.S3DestinationConfigurationProperty( bucket_arn=ingest_bucket.bucket_arn, buffering_hints=_firehose.CfnDeliveryStream.BufferingHintsProperty( interval_in_seconds=60 ), compression_format="UNCOMPRESSED", role_arn=firehose_role.role_arn )) event_consumer3_rule = events.Rule(self, 'eventConsumer3KinesisRule', description='Approved Transactions', event_pattern=events.EventPattern(source=['com.mycompany.myapp'] )) event_consumer3_rule.add_target(targets.KinesisFirehoseStream(stream=event_consumer3_kinesisfirehose)) # defines an API Gateway REST API resource backed by our "atm_producer_lambda" function. api = api_gw.LambdaRestApi(self, 'SampleAPI-EventBridge-Multi-Consumer', handler=event_producer_lambda, proxy=False ) items = api.root.add_resource("items") items.add_method("POST") # POST /items
def __init__(self, scope: cdk.Construct, id: str, name: str, vpc_name: str, security_group_name: str, secrets_path: str = "/ibc/paper/", trading_mode: str = "paper", **kwargs) -> None: super().__init__(scope, id, *kwargs) # TODO: Create Log Group # Create a cluster vpc = ec2.Vpc.from_lookup(self, "vpc", vpc_name=vpc_name) privateSubnets = vpc.private_subnets cluster = ecs.Cluster(self, "cluster", vpc=vpc) # TODO: check for namespace before adding below. This is failing on stack updates. cluster.add_default_cloud_map_namespace(name="private") task = ecs.FargateTaskDefinition(self, "task", cpu="512", memory_mi_b="1024") # Add SSM Permissions to IAM Role SSM_ACTIONS = ["ssm:GetParametersByPath", "kms:Decrypt"] SSM_RESOURCES = [ "arn:aws:kms:*:*:alias/aws/ssm", "arn:aws:ssm:*:*:parameter{}*".format(secrets_path), ] ssmPolicy = iam.PolicyStatement(iam.PolicyStatementEffect.Allow) for action in SSM_ACTIONS: ssmPolicy.add_action(action) for resource in SSM_RESOURCES: ssmPolicy.add_resource(resource) task.add_to_task_role_policy(ssmPolicy) ibcRepo = ecr.Repository.from_repository_name(self, "container_repo", "ibc") ibcImage = ecs.ContainerImage.from_ecr_repository(ibcRepo, "latest") # TODO: Add to Existing Hierarchal Logger, add log_group argument with ref to it ibcLogger = ecs.AwsLogDriver(self, "logger", stream_prefix=name) connectionLossMetric = logs.MetricFilter( self, "connectionLossMetric", filter_pattern=logs.FilterPattern.literal("ERROR ?110 ?130"), log_group=ibcLogger.log_group, metric_name="ib_connection_loss", metric_namespace=name, ) newContainerMetric = logs.MetricFilter( self, "newContainerMetric", filter_pattern=logs.FilterPattern.literal( "Starting virtual X frame buffer"), log_group=ibcLogger.log_group, metric_name="new_container", metric_namespace=name, ) kinesisFirehoseBucketActions = [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", ] kinesisFirehoseBucket = s3.Bucket(self, "firehoseBucket") kinesisFirehoseBucketPolicy = iam.PolicyStatement( iam.PolicyStatementEffect.Allow) for action in kinesisFirehoseBucketActions: kinesisFirehoseBucketPolicy.add_action(action) for resource in [ kinesisFirehoseBucket.bucket_arn, kinesisFirehoseBucket.bucket_arn + "/*", ]: kinesisFirehoseBucketPolicy.add_resource(resource) kinesisFirehoseBucketRole = iam.Role( self, "kinesisFirehoseBucketRole", assumed_by=iam.ServicePrincipal("firehose.amazonaws.com"), path="/service/" + name + "/", ) kinesisFirehoseBucketRole.add_to_policy(kinesisFirehoseBucketPolicy) kinesisFirehose = firehose.CfnDeliveryStream( self, "firehose", delivery_stream_name=name, delivery_stream_type="DirectPut", s3_destination_configuration={ "bucketArn": kinesisFirehoseBucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 10 * 60, "sizeInMBs": 16 }, "compressionFormat": "GZIP", "roleArn": kinesisFirehoseBucketRole.role_arn, }, ) # Add Firehose Permissions to Task IAM Role FIREHOSE_ACTIONS = ["firehose:PutRecord", "firehose:PutRecordBatch"] firehosePolicy = iam.PolicyStatement(iam.PolicyStatementEffect.Allow) for action in FIREHOSE_ACTIONS: firehosePolicy.add_action(action) firehosePolicy.add_resource(kinesisFirehose.delivery_stream_arn) task.add_to_task_role_policy(firehosePolicy) environment = { "SECRETS_PATH": secrets_path, "TWS_LIVE_PAPER": trading_mode, "FIREHOSE_STREAM_NAME": kinesisFirehose.delivery_stream_name, } ibcContainer = ecs.ContainerDefinition( self, "container", task_definition=task, image=ibcImage, environment=environment, logging=ibcLogger, essential=True, ) securityGroup = ec2.SecurityGroup.from_security_group_id( self, "task_security_group", security_group_id=security_group_name) ibcService = ecs.FargateService( self, "fargate_service", cluster=cluster, task_definition=task, assign_public_ip=False, desired_count=1, security_group=securityGroup, service_discovery_options=ecs.ServiceDiscoveryOptions(name=name), service_name=name, vpc_subnets=privateSubnets, )
def __init__(self, scope: core.Construct, id: str, is_qa_stack=False, **kwargs) -> None: super().__init__(scope, id, **kwargs) def qa_maybe(id_str: str) -> str: return id_str if not is_qa_stack else id_str + '-qa' # Bucket used to deliver events delivery_bucket = aws_s3.Bucket( self, id=qa_maybe('my-event-storage-bucket'), bucket_name=qa_maybe('my-event-storage-bucket'), block_public_access=aws_s3.BlockPublicAccess.BLOCK_ALL) # ---- Below is firehose related code ---- # Since firehose is not yet cdk ready we need to do everything the old way with defining roles role = aws_iam.Role( self, id=qa_maybe('my-firehose-delivery-role'), assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com')) delivery_bucket.grant_write(role) # Everything that is not CDK ready still exists like Cfn (Cloudformation?) objects firehose = aws_kinesisfirehose.CfnDeliveryStream( self, id=qa_maybe('my-pipeline-firehose'), delivery_stream_name=qa_maybe('my-pipeline-firehose'), delivery_stream_type='DirectPut', s3_destination_configuration={ 'bucketArn': delivery_bucket.bucket_arn, 'bufferingHints': { 'intervalInSeconds': 900, # The recommended setting is 900 (maximum for firehose) 'sizeInMBs': 5 }, 'compressionFormat': 'UNCOMPRESSED', 'prefix': 'events/', # This is the folder the events will end up in 'errorOutputPrefix': 'delivery_error/', # Folder in case of delivery error 'roleArn': role.role_arn }) # Policy statement required for lambda to be able to put records on the firehose stream firehose_policy = aws_iam.PolicyStatement( actions=['firehose:DescribeDeliveryStream', 'firehose:PutRecord'], effect=aws_iam.Effect.ALLOW, resources=[firehose.attr_arn]) # ---- API GW + Lambda code ---- api_lambda = aws_lambda.Function( self, id=qa_maybe('my-api-gw-lambda'), runtime=aws_lambda.Runtime.PYTHON_3_8, code=aws_lambda.Code.asset('src/lambda_code/api_gw_lambda'), handler='main.handler', memory_size=128, timeout=core.Duration.seconds(5), environment={ 'region': self.region, 'stream_name': firehose.delivery_stream_name }) # Important to add the firehose postRecord policy to lambda otherwise there will be access errors api_lambda.add_to_role_policy(firehose_policy) # Create the lambda that will receive the data messages api_gw = aws_apigateway.LambdaRestApi( self, id=qa_maybe('my-api-gw'), handler=api_lambda, proxy=False, deploy_options=aws_apigateway.StageOptions( stage_name='qa' if is_qa_stack else 'prod')) # Add API query method api_gw.root.add_resource('send_data').add_method('GET', api_key_required=True) # Generate an API key and add it to a usage plan api_key = api_gw.add_api_key(qa_maybe('MyPipelinePublicKey')) usage_plan = api_gw.add_usage_plan( id=qa_maybe('my-pipeline-usage-plan'), name='standard', api_key=api_key, throttle=aws_apigateway.ThrottleSettings(rate_limit=10, burst_limit=2)) # Add the usage plan to the API GW usage_plan.add_api_stage(stage=api_gw.deployment_stage)
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name') service_role_kdf_to_s3 = cdk.Fn.import_value( 'siem-kdf-to-s3-role-name') cwe_frequency = cdk.CfnParameter( self, 'cweRulesFrequency', type='Number', description=( 'How often do you get WorkSpaces Inventory? (every minutes)'), default=720) kdf_workspaces_name = cdk.CfnParameter( self, 'KdfWorkSpacesName', description=( 'Kinesis Data Firehose Name to deliver workspaces event'), default='siem-workspaces-event-to-s3', ) kdf_buffer_size = cdk.CfnParameter( self, 'KdfBufferSize', type='Number', description='Enter a buffer size between 1 - 128 (MiB)', default=1, min_value=1, max_value=128) kdf_buffer_interval = cdk.CfnParameter( self, 'KdfBufferInterval', type='Number', description='Enter a buffer interval between 60 - 900 (seconds.)', default=60, min_value=60, max_value=900) role_get_workspaces_inventory = aws_iam.Role( self, 'getWorkspacesInventoryRole', role_name='siem-get-workspaces-inventory-role', inline_policies={ 'describe-workspaces': aws_iam.PolicyDocument(statements=[ aws_iam.PolicyStatement( actions=['workspaces:Describe*'], resources=['*'], sid='DescribeWorkSpacesPolicyGeneratedBySeimCfn') ]), 'firehose-to-s3': aws_iam.PolicyDocument(statements=[ aws_iam.PolicyStatement( actions=['s3:PutObject'], resources=[f'arn:aws:s3:::{log_bucket_name}/*'], sid='FirehoseToS3PolicyGeneratedBySeimCfn') ]) }, managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), ], assumed_by=aws_iam.ServicePrincipal('lambda.amazonaws.com')) # Lambda Functions to get workspaces inventory lambda_func = aws_lambda.Function( self, 'lambdaGetWorkspacesInventory', runtime=aws_lambda.Runtime.PYTHON_3_8, code=aws_lambda.InlineCode(LAMBDA_GET_WORKSPACES_INVENTORY), function_name='siem-get-workspaces-inventory', description='SIEM: get workspaces inventory', handler='index.lambda_handler', timeout=cdk.Duration.seconds(300), role=role_get_workspaces_inventory, environment={'log_bucket_name': log_bucket_name}) rule = aws_events.Rule(self, 'eventBridgeRuleWorkSpaceInventory', rule_name='siem-workspaces-inventory-to-lambda', schedule=aws_events.Schedule.rate( cdk.Duration.minutes( cwe_frequency.value_as_number))) rule.add_target(aws_events_targets.LambdaFunction(lambda_func)) kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream( self, "KDFForWorkSpacesEvent", delivery_stream_name=kdf_workspaces_name.value_as_string, s3_destination_configuration=CDS. S3DestinationConfigurationProperty( bucket_arn=f'arn:aws:s3:::{log_bucket_name}', prefix=f'AWSLogs/{cdk.Aws.ACCOUNT_ID}/WorkSpaces/Event/', compression_format='GZIP', buffering_hints=CDS.BufferingHintsProperty( interval_in_seconds=kdf_buffer_interval.value_as_number, size_in_m_bs=kdf_buffer_size.value_as_number), role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/' f'service-role/{service_role_kdf_to_s3}'))) pattern = aws_events.EventPattern(detail_type=["WorkSpaces Access"], source=['aws.workspaces']) aws_events.Rule( self, 'eventBridgeRuleWorkSpacesEvent', event_pattern=pattern, rule_name='siem-workspaces-event-to-kdf', targets=[aws_events_targets.KinesisFirehoseStream(kdf_to_s3)])
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "FirehoseToS3VPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="firehose-to-s3-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) FIREHOSE_STREAM_NAME = cdk.CfnParameter( self, 'FirehoseStreamName', type='String', description='kinesis data firehose stream name', default='PUT-S3-{}'.format(''.join( random.sample((string.ascii_letters), k=5)))) FIREHOSE_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseBufferSize', type='Number', description='kinesis data firehose buffer size', min_value=1, max_value=128, default=128) FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseBufferInterval', type='Number', description='kinesis data firehose buffer interval', min_value=60, max_value=300, default=60) FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseLambdaBufferSize', type='Number', description= 'kinesis data firehose buffer size for AWS Lambda to transform records', min_value=1, max_value=3, default=3) FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseLambdaBufferInterval', type='Number', description= 'kinesis data firehose buffer interval for AWS Lambda to transform records', min_value=60, max_value=900, default=300) FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter( self, 'FirehoseLambdaNumberOfRetries', type='Number', description= 'Number of retries for AWS Lambda to transform records in kinesis data firehose', min_value=1, max_value=5, default=3) FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter( self, 'FirehosePrefix', type='String', description='kinesis data firehose S3 prefix') FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter( self, 'FirehoseErrorOutputPrefix', type='String', description='kinesis data firehose S3 error output prefix', default= 'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}' ) METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor" metadata_extract_lambda_fn = aws_lambda.Function( self, "MetadataExtractor", runtime=aws_lambda.Runtime.PYTHON_3_7, function_name="MetadataExtractor", handler="metadata_extractor.lambda_handler", description="Extract partition keys from records", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), timeout=cdk.Duration.minutes(5)) log_group = aws_logs.LogGroup( self, "MetadataExtractorLogGroup", #XXX: Circular dependency between resources occurs # if aws_lambda.Function.function_name is used # instead of literal name of lambda function such as "MetadataExtractor" log_group_name="/aws/lambda/{}".format( METADATA_EXTRACT_LAMBDA_FN_NAME), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) log_group.grant_write(metadata_extract_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html # String-encoded tokens: # Avoid manipulating the string in other ways. For example, # taking a substring of a string is likely to break the string token. firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} "resources": [ self.format_arn( partition="aws", service="lambda", region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, resource="function", resource_name="{}:*".format( metadata_extract_lambda_fn.function_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], "actions": [ "lambda:InvokeFunction", "lambda:GetFunctionConfiguration" ] })) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name="KinesisFirehoseServiceRole-{stream_name}-{region}". format(stream_name=FIREHOSE_STREAM_NAME.value_as_string, region=cdk.Aws.REGION), assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), path='/service-role/', #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) lambda_proc = cfn.ProcessorProperty( type="Lambda", parameters=[ cfn.ProcessorParameterProperty( parameter_name="LambdaArn", parameter_value='{}:{}'.format( metadata_extract_lambda_fn.function_arn, metadata_extract_lambda_fn.current_version.version)), cfn.ProcessorParameterProperty( parameter_name="NumberOfRetries", parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES. value_as_string), cfn.ProcessorParameterProperty( parameter_name="RoleArn", parameter_value=firehose_role.role_arn), cfn.ProcessorParameterProperty( parameter_name="BufferSizeInMBs", parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string ), cfn.ProcessorParameterProperty( parameter_name="BufferIntervalInSeconds", parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL. value_as_string) ]) record_deaggregation_proc = cfn.ProcessorProperty( type="RecordDeAggregation", parameters=[ cfn.ProcessorParameterProperty(parameter_name="SubRecordType", parameter_value="JSON") ]) #XXX: Adding a new line delimiter when delivering data to S3 # This is also particularly useful when dynamic partitioning is applied to aggregated data # because multirecord deaggregation (which must be applied to aggregated data # before it can be dynamically partitioned) removes new lines from records as part of the parsing process. # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter append_delim_to_record_proc = cfn.ProcessorProperty( type="AppendDelimiterToRecord", parameters=[]) firehose_processing_config = cfn.ProcessingConfigurationProperty( enabled=True, processors=[ record_deaggregation_proc, append_delim_to_record_proc, lambda_proc ]) ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty( bucket_arn=s3_bucket.bucket_arn, role_arn=firehose_role.role_arn, buffering_hints={ "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number, "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "DestinationDelivery" }, compression_format= "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] data_format_conversion_configuration={"enabled": False}, dynamic_partitioning_configuration={ "enabled": True, "retryOptions": { "durationInSeconds": 300 } }, error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX. value_as_string, prefix=FIREHOSE_TO_S3_PREFIX.value_as_string, processing_configuration=firehose_processing_config) firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "FirehoseToS3", delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string, delivery_stream_type="DirectPut", extended_s3_destination_configuration=ext_s3_dest_config, tags=[{ "key": "Name", "value": FIREHOSE_STREAM_NAME.value_as_string }]) cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket')
def __init__(self, scope: core.Stack, id: str, **kwargs): super().__init__(scope, id, **kwargs) self.output_bucket = aws_s3.Bucket( self, "BucketTwitterStreamOutput", bucket_name = self.stack_name, ) self.bucket_url = self.output_bucket.bucket_regional_domain_name # Because kinesis firehose bindings are to direct CF, we have to create IAM policy/role and attach on our own self.iam_role = aws_iam.Role( self, "IAMRoleTwitterStreamKinesisFHToS3", role_name="KinesisFirehoseToS3-{}".format(self.stack_name), assumed_by=aws_iam.ServicePrincipal(service='firehose.amazonaws.com'), ) # S3 bucket actions self.s3_iam_policy_statement = aws_iam.PolicyStatement() actions = ["s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject"] for action in actions: self.s3_iam_policy_statement.add_actions(action) self.s3_iam_policy_statement.add_resources(self.output_bucket.bucket_arn) self.s3_iam_policy_statement.add_resources(self.output_bucket.bucket_arn + "/*") # CW error log setup self.s3_error_logs_group = aws_logs.LogGroup( self, "S3ErrorLogsGroup", log_group_name="{}-s3-errors".format(self.stack_name) ) self.s3_error_logs_stream = aws_logs.LogStream( self, "S3ErrorLogsStream", log_group=self.s3_error_logs_group, log_stream_name='s3Backup' ) self.firehose = aws_kinesisfirehose.CfnDeliveryStream( self, "FirehoseTwitterStream", delivery_stream_name = "{}-raw".format(self.stack_name), delivery_stream_type = "DirectPut", s3_destination_configuration={ 'bucketArn': self.output_bucket.bucket_arn, 'bufferingHints': { 'intervalInSeconds': 120, 'sizeInMBs': 10 }, 'compressionFormat': 'UNCOMPRESSED', 'roleArn': self.iam_role.role_arn, 'cloudWatchLoggingOptions': { 'enabled': True, 'logGroupName': "{}-raw".format(self.stack_name), 'logStreamName': 's3BackupRaw' }, 'prefix': 'twitter-raw/' }, ) # TODO: Only attach what's needed for this policy, right now i'm lazy and attaching all policies self.iam_policy = aws_iam.Policy( self, "IAMPolicyTwitterStreamKinesisFHToS3", policy_name="KinesisFirehoseToS3-{}".format(self.stack_name), statements=[self.s3_iam_policy_statement], ) self.iam_policy.attach_to_role(self.iam_role) # Because kinesis firehose bindings are to direct CF, we have to create IAM policy/role and attach on our own self.curator_firehose = aws_kinesisfirehose.CfnDeliveryStream( self, "CuratorFirehoseStream", delivery_stream_name = "{}-curator".format(self.stack_name), delivery_stream_type = "DirectPut", s3_destination_configuration={ 'bucketArn': self.output_bucket.bucket_arn, 'bufferingHints': { 'intervalInSeconds': 120, 'sizeInMBs': 10 }, 'compressionFormat': 'UNCOMPRESSED', 'roleArn': self.iam_role.role_arn, 'cloudWatchLoggingOptions': { 'enabled': True, 'logGroupName': "{}-curator".format(self.stack_name), 'logStreamName': 's3BackupCurator' }, 'prefix': 'twitter-curated/' }, ) def zip_package(): cwd = os.getcwd() file_name = 'curator-lambda.zip' zip_file = cwd + '/' + file_name os.chdir('src/') sh.zip('-r9', zip_file, '.') os.chdir(cwd) return file_name, zip_file _, zip_file = zip_package() self.twitter_stream_curator_lambda_function = aws_lambda.Function( self, "TwitterStreamCuratorLambdaFunction", function_name="{}-curator".format(self.stack_name), code=aws_lambda.AssetCode(zip_file), handler="sentiment_analysis.lambda_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, tracing=aws_lambda.Tracing.ACTIVE, description="Triggers from S3 PUT event for twitter stream data and transorms it to clean json syntax with sentiment analysis attached", environment={ "STACK_NAME": self.stack_name, "FIREHOSE_STREAM": self.curator_firehose.delivery_stream_name }, memory_size=128, timeout=core.Duration.seconds(120), log_retention=aws_logs.RetentionDays.ONE_WEEK, ) # Permission to talk to comprehend for sentiment analysis self.comprehend_iam_policy_statement = aws_iam.PolicyStatement() self.comprehend_iam_policy_statement.add_actions('comprehend:*') self.comprehend_iam_policy_statement.add_all_resources() self.twitter_stream_curator_lambda_function.add_to_role_policy(self.comprehend_iam_policy_statement) # Permission to put in kinesis firehose self.curator_firehose_iam_policy_statement = aws_iam.PolicyStatement() self.curator_firehose_iam_policy_statement.add_actions('firehose:Put*') self.curator_firehose_iam_policy_statement.add_resources(self.curator_firehose.attr_arn) self.twitter_stream_curator_lambda_function.add_to_role_policy(self.curator_firehose_iam_policy_statement) # Attaching the policy to the IAM role for KFH self.output_bucket.grant_read(self.twitter_stream_curator_lambda_function) self.twitter_stream_curator_lambda_function.add_event_source( aws_lambda_event_sources.S3EventSource( bucket=self.output_bucket, events=[ aws_s3.EventType.OBJECT_CREATED ], filters=[ aws_s3.NotificationKeyFilter( prefix="twitter-raw/" ) ] ) )
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) kda_src_bucket_name = core.CfnParameter( self, "kda_src_bucket_name", type="String", description= "The name of the Amazon S3 bucket where uploaded files will be stored." ) kda_output_bucket_name = core.CfnParameter( self, "kda_output_bucket_name", type="String", description= "The name of the Amazon S3 bucket KDA output via Firehose will be stored." ) sourceStreamName = core.CfnParameter( self, "sourceStreamName", type="String", description="The name of the Kinesis Data Stream.", default="BikeRideGenerator") deliveryStreamName = core.CfnParameter( self, "deliveryStreamName", type="String", description="The name of the Kinesis Firehose output stream.", default="BikeAnalyticsOutput") # Create S3 buckets kda_src_bucket = s3.Bucket( self, "kda_src_bucket", bucket_name=kda_src_bucket_name.value_as_string, versioned=False, removal_policy=core.RemovalPolicy.DESTROY) kda_output_bucket = s3.Bucket( self, "kda_output_bucket", bucket_name=kda_output_bucket_name.value_as_string, versioned=False, removal_policy=core.RemovalPolicy.DESTROY) # create Kinesis Source Stream sourceStream = kds.Stream(self, "sourceStream", stream_name=sourceStreamName.value_as_string, shard_count=10) # Firehose Role aws_cdk.aws_iam.CfnRole fhIAMRole = iam.Role( self, "fhIAMRole", assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'), role_name="BikeRideFirehoseDeliveryRole", description="FireHose Delivery S3 Role") fhIAMRole.add_to_policy( iam.PolicyStatement(resources=[kda_output_bucket.bucket_arn], actions=['s3:*'])) # create Firehose delivery stream fhS3Delivery = fh.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=kda_output_bucket.bucket_arn, role_arn=fhIAMRole.role_arn) deliveryStream = fh.CfnDeliveryStream( self, "deliveryStream", delivery_stream_name=deliveryStreamName.value_as_string, extended_s3_destination_configuration=fhS3Delivery) # ec2 instance # VPC vpc = ec2.Vpc(self, "KDA-VPC", nat_gateways=0, subnet_configuration=[ ec2.SubnetConfiguration( name="public", subnet_type=ec2.SubnetType.PUBLIC) ]) # AMI amzn_linux = ec2.MachineImage.latest_amazon_linux( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=ec2.AmazonLinuxEdition.STANDARD, virtualization=ec2.AmazonLinuxVirt.HVM, storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE) # Instance Role and SSM Managed Policy ec2role = iam.Role( self, "InstanceSSM", assumed_by=iam.ServicePrincipal("ec2.amazonaws.com")) ec2role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2RoleforSSM")) ec2role.add_to_policy( iam.PolicyStatement(resources=[sourceStream.stream_arn], actions=['kinesis:*'])) user_data = "#!/bin/bash\n" user_data += "echo export KINESIS_STREAM=" + sourceStreamName.value_as_string + " | sudo tee -a /etc/profile\n" user_data += "source /etc/profile\n" user_data += user_data_file # Instance instance = ec2.Instance(self, "Instance", instance_type=ec2.InstanceType("t3.small"), machine_image=amzn_linux, vpc=vpc, role=ec2role, user_data=ec2.UserData.custom(user_data))
def _build_firehose_delivery_stream(self, *, stack, vpc_db_instance): self.kfh_log_group = logs.LogGroup( stack, "exampledeliverystreamloggroup", log_group_name="/aws/kinesisfirehose/exampledeliverystream") self.kfh_es_log_stream = logs.LogStream(stack, "deliverytoeslogstream", log_stream_name="deliverytoes", log_group=self.kfh_log_group) self.kfh_instance = kfh.CfnDeliveryStream( stack, 'exampledeliverystream', delivery_stream_type='DirectPut', elasticsearch_destination_configuration={ "indexName": "webappclickstream", "cloudwatch_logging_options": { "Enabled": True, "LogGroupName": "exampledeliverystream", "LogStreamName": "deliverytoes" }, "roleArn": self.firehose_role.role_arn, "s3Configuration": { "bucketArn": self.firehose_bucket.bucket_arn, "roleArn": self.firehose_role.role_arn }, "domainArn": self.elastic_search.attr_arn, "vpcConfiguration": { "roleArn": self.firehose_role.role_arn, "securityGroupIds": [self.kfh_security_group.security_group_id], "subnetIds": [ vpc_db_instance.vpc.select_subnets( subnet_type=ec2.SubnetType.PRIVATE).subnet_ids[0] ] }, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "ProcessingConfiguration": { "enabled": True, "Processors": [] }, "ProcessingConfiguration": { "Enabled": "true", "Processors": [{ "Parameters": [{ "ParameterName": "LambdaArn", "ParameterValue": self.lambda_transform_fn.function_arn }], "Type": "Lambda" }] } })
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) vpc = aws_ec2.Vpc( self, "OctemberVPC", max_azs=2, # subnet_configuration=[{ # "cidrMask": 24, # "name": "Public", # "subnetType": aws_ec2.SubnetType.PUBLIC, # }, # { # "cidrMask": 24, # "name": "Private", # "subnetType": aws_ec2.SubnetType.PRIVATE # }, # { # "cidrMask": 28, # "name": "Isolated", # "subnetType": aws_ec2.SubnetType.ISOLATED, # "reserved": True # } # ], gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) dynamo_db_endpoint = vpc.add_gateway_endpoint( "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB) s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="octember-bizcard-{region}-{account}".format( region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID)) api = apigw.RestApi( self, "BizcardImageUploader", rest_api_name="BizcardImageUploader", description="This service serves uploading bizcard images into s3.", endpoint_types=[apigw.EndpointType.REGIONAL], binary_media_types=["image/png", "image/jpg"], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) rest_api_role = aws_iam.Role( self, "ApiGatewayRoleForS3", role_name="ApiGatewayRoleForS3FullAccess", assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3FullAccess") ]) list_objects_responses = [ apigw.IntegrationResponse( status_code="200", #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters # The response parameters from the backend response that API Gateway sends to the method response. # Use the destination as the key and the source as the value: # - The destination must be an existing response parameter in the MethodResponse property. # - The source must be an existing method request parameter or a static value. response_parameters={ 'method.response.header.Timestamp': 'integration.response.header.Date', 'method.response.header.Content-Length': 'integration.response.header.Content-Length', 'method.response.header.Content-Type': 'integration.response.header.Content-Type' }), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ] list_objects_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses) get_s3_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path='/', options=list_objects_integration_options) api.root.add_method( "GET", get_s3_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={'method.request.header.Content-Type': False}) get_s3_folder_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value. # The source must be an existing method request parameter or a static value. request_parameters={ "integration.request.path.bucket": "method.request.path.folder" }) get_s3_folder_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}", options=get_s3_folder_integration_options) s3_folder = api.root.add_resource('{folder}') s3_folder.add_method( "GET", get_s3_folder_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True }) get_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, request_parameters={ "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) get_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}/{object}", options=get_s3_item_integration_options) s3_item = s3_folder.add_resource('{item}') s3_item.add_method( "GET", get_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) put_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=[ apigw.IntegrationResponse(status_code="200"), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ], request_parameters={ "integration.request.header.Content-Type": "method.request.header.Content-Type", "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) put_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="PUT", path="{bucket}/{object}", options=put_s3_item_integration_options) s3_item.add_method( "PUT", put_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) ddb_table = dynamodb.Table( self, "BizcardImageMetaInfoDdbTable", table_name="OctemberBizcardImgMeta", partition_key=dynamodb.Attribute( name="image_id", type=dynamodb.AttributeType.STRING), billing_mode=dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5) img_kinesis_stream = kinesis.Stream( self, "BizcardImagePath", stream_name="octember-bizcard-image") # create lambda function trigger_textract_lambda_fn = _lambda.Function( self, "TriggerTextExtractorFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="TriggerTextExtractorFromImage", handler="trigger_text_extract_from_s3_image.lambda_handler", description="Trigger to extract text from an image in S3", code=_lambda.Code.asset( "./src/main/python/TriggerTextExtractFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) ddb_table_rw_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ddb_table.table_arn], actions=[ "dynamodb:BatchGetItem", "dynamodb:Describe*", "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query", "dynamodb:Scan", "dynamodb:BatchWriteItem", "dynamodb:DeleteItem", "dynamodb:PutItem", "dynamodb:UpdateItem", "dax:Describe*", "dax:List*", "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan", "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem", "dax:UpdateItem" ]) trigger_textract_lambda_fn.add_to_role_policy( ddb_table_rw_policy_statement) trigger_textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[img_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) # assign notification for the s3 event type (ex: OBJECT_CREATED) s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/", suffix=".jpg") s3_event_source = S3EventSource(s3_bucket, events=[s3.EventType.OBJECT_CREATED], filters=[s3_event_filter]) trigger_textract_lambda_fn.add_event_source(s3_event_source) #XXX: https://github.com/aws/aws-cdk/issues/2240 # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props log_group = aws_logs.LogGroup( self, "TriggerTextractLogGroup", log_group_name="/aws/lambda/TriggerTextExtractorFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(trigger_textract_lambda_fn) text_kinesis_stream = kinesis.Stream( self, "BizcardTextData", stream_name="octember-bizcard-txt") textract_lambda_fn = _lambda.Function( self, "GetTextFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="GetTextFromImage", handler="get_text_from_s3_image.lambda_handler", description="extract text from an image in S3", code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["textract:*"])) img_kinesis_event_source = KinesisEventSource( img_kinesis_stream, batch_size=100, starting_position=_lambda.StartingPosition.LATEST) textract_lambda_fn.add_event_source(img_kinesis_event_source) log_group = aws_logs.LogGroup( self, "GetTextFromImageLogGroup", log_group_name="/aws/lambda/GetTextFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(textract_lambda_fn) sg_use_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard elasticsearch client', security_group_name='use-octember-bizcard-es') core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es') sg_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard elasticsearch', security_group_name='octember-bizcard-es') core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='use-octember-bizcard-es') sg_ssh_access = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for bastion host', security_group_name='octember-bastion-host-sg') core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host') sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(), connection=aws_ec2.Port.tcp(22), description='ssh access') bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=aws_ec2.InstanceType('t3.nano'), security_group=sg_ssh_access, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC)) bastion_host.instance.add_security_group(sg_use_bizcard_es) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_cfn_domain = aws_elasticsearch.CfnDomain( self, 'BizcardSearch', elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name="octember-bizcard", elasticsearch_version="7.9", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn(service="es", resource="domain", resource_name="octember-bizcard/*") }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_bizcard_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es') s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name") #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, s3_lib_bucket_name) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-es-lib.zip")) redis_lib_layer = _lambda.LayerVersion( self, "RedisLib", layer_version_name="redis-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-redis-lib.zip")) #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertBizcardToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToElasticSearch", handler="upsert_bizcard_to_es.lambda_handler", description="Upsert bizcard text into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_bizcard_es], vpc=vpc) text_kinesis_event_source = KinesisEventSource( text_kinesis_stream, batch_size=99, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToESLogGroup", log_group_name="/aws/lambda/UpsertBizcardToElasticSearch", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "BizcardTextToS3", delivery_stream_name="octember-bizcard-txt-to-s3", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": text_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "GZIP", "prefix": "bizcard-text/", "roleArn": firehose_role.role_arn }) sg_use_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache client', security_group_name='use-octember-bizcard-es-cache') core.Tags.of(sg_use_bizcard_es_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache', security_group_name='octember-bizcard-es-cache') core.Tags.of(sg_bizcard_es_cache).add('Name', 'octember-bizcard-es-cache') sg_bizcard_es_cache.add_ingress_rule( peer=sg_use_bizcard_es_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-es-cache') es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "QueryCacheSubnetGroup", description="subnet group for octember-bizcard-es-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-es-cache') es_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardSearchQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-es-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-es-cache', vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id]) #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster. # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname es_query_cache.add_depends_on(es_query_cache_subnet_group) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 bizcard_search_lambda_fn = _lambda.Function( self, "BizcardSearchServer", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardSearchProxy", handler="es_search_bizcard.lambda_handler", description="Proxy server to search bizcard text", code=_lambda.Code.asset("./src/main/python/SearchBizcard"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard', 'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[es_lib_layer, redis_lib_layer], security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache], vpc=vpc) #XXX: create API Gateway + LambdaProxy search_api = apigw.LambdaRestApi( self, "BizcardSearchAPI", handler=bizcard_search_lambda_fn, proxy=False, rest_api_name="BizcardSearch", description="This service serves searching bizcard text.", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_search = search_api.root.add_resource('search') bizcard_search.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sg_use_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbClientSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db client', security_group_name='use-octember-bizcard-neptune') core.Tags.of(sg_use_bizcard_graph_db).add( 'Name', 'use-octember-bizcard-neptune') sg_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db', security_group_name='octember-bizcard-neptune') core.Tags.of(sg_bizcard_graph_db).add('Name', 'octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_use_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-octember-bizcard-neptune') bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup( self, "NeptuneSubnetGroup", db_subnet_group_description= "subnet group for octember-bizcard-neptune", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, db_subnet_group_name='octember-bizcard-neptune') bizcard_graph_db = aws_neptune.CfnDBCluster( self, "BizcardGraphDB", availability_zones=vpc.availability_zones, db_subnet_group_name=bizcard_graph_db_subnet_group. db_subnet_group_name, db_cluster_identifier="octember-bizcard", backup_retention_period=1, preferred_backup_window="08:45-09:15", preferred_maintenance_window="sun:18:00-sun:18:30", vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id]) bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group) bizcard_graph_db_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBReplicaInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard-replica", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance.add_depends_on( bizcard_graph_db_instance) gremlinpython_lib_layer = _lambda.LayerVersion( self, "GremlinPythonLib", layer_version_name="gremlinpython-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket( s3_lib_bucket, "var/octember-gremlinpython-lib.zip")) #XXX: https://github.com/aws/aws-cdk/issues/1342 upsert_to_neptune_lambda_fn = _lambda.Function( self, "UpsertBizcardToGraphDB", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToNeptune", handler="upsert_bizcard_to_graph_db.lambda_handler", description="Upsert bizcard into neptune", code=_lambda.Code.asset( "./src/main/python/UpsertBizcardToGraphDB"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port }, timeout=core.Duration.minutes(5), layers=[gremlinpython_lib_layer], security_groups=[sg_use_bizcard_graph_db], vpc=vpc) upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToGraphDBLogGroup", log_group_name="/aws/lambda/UpsertBizcardToNeptune", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_neptune_lambda_fn) sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache client', security_group_name='use-octember-bizcard-neptune-cache') core.Tags.of(sg_use_bizcard_neptune_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache', security_group_name='octember-bizcard-neptune-cache') core.Tags.of(sg_bizcard_neptune_cache).add( 'Name', 'octember-bizcard-neptune-cache') sg_bizcard_neptune_cache.add_ingress_rule( peer=sg_use_bizcard_neptune_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-neptune-cache') recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "RecommQueryCacheSubnetGroup", description="subnet group for octember-bizcard-neptune-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-neptune-cache') recomm_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardRecommQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-neptune-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-neptune-cache', vpc_security_group_ids=[ sg_bizcard_neptune_cache.security_group_id ]) recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group) bizcard_recomm_lambda_fn = _lambda.Function( self, "BizcardRecommender", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardRecommender", handler="neptune_recommend_bizcard.lambda_handler", description="This service serves PYMK(People You May Know).", code=_lambda.Code.asset("./src/main/python/RecommendBizcard"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port, 'ELASTICACHE_HOST': recomm_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[gremlinpython_lib_layer, redis_lib_layer], security_groups=[ sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache ], vpc=vpc) #XXX: create API Gateway + LambdaProxy recomm_api = apigw.LambdaRestApi( self, "BizcardRecommendAPI", handler=bizcard_recomm_lambda_fn, proxy=False, rest_api_name="BizcardRecommend", description="This service serves PYMK(People You May Know).", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_recomm = recomm_api.root.add_resource('pymk') bizcard_recomm.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*" ], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*". format(region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, cluster_id=bizcard_graph_db. attr_cluster_resource_id) ], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-OctemberBizcard', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc }) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint, NeptuneClusterPort=bizcard_graph_db.attr_port, AWS_Region=core.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(neptune_wb_lifecycle_content)) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'AWSNeptuneWorkbenchOctemberBizcardLCConfig', on_start=[neptune_wb_lifecycle_config_prop]) neptune_workbench = aws_sagemaker.CfnNotebookInstance( self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='OctemberBizcard-NeptuneWorkbench', root_access='Disabled', security_group_ids=[sg_use_bizcard_graph_db.security_group_name], subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here table = aws_dynamodb.Table(self, "DashboardModel", partition_key=aws_dynamodb.Attribute(name="Pk", type=aws_dynamodb.AttributeType.STRING), sort_key=aws_dynamodb.Attribute(name="Sk", type=aws_dynamodb.AttributeType.STRING), billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST) kds_input_stream = aws_kinesis.Stream(self, "kds_dashboard_input_stream", shard_count=1, stream_name="kds_dashboard_input_stream") kds_output_stream = aws_kinesis.Stream(self, "kds_dashboard_output_stream", shard_count=1, stream_name="kds_dashboard_output_stream") # Creating a ingest bucket for this stack ingest_bucket = aws_s3.Bucket(self,'dreis_dboard_ingest_bucket') kfh_service_role = aws_iam.Role(self, 'KFH_Dashboard_Role', assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com') ) kfh_policy_stmt = aws_iam.PolicyStatement( actions=["*"], resources=["*"] ) kfh_service_role.add_to_policy(kfh_policy_stmt) #Creating firehose for this stack kfh_source = aws_kinesisfirehose.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty( kinesis_stream_arn=kds_input_stream.stream_arn, role_arn=kfh_service_role.role_arn ) kfh_datalake = aws_kinesisfirehose.CfnDeliveryStream(self, "kfh_datalake", s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty( bucket_arn=ingest_bucket.bucket_arn, buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty( interval_in_seconds=60, size_in_m_bs=5), compression_format="UNCOMPRESSED", role_arn=kfh_service_role.role_arn ), delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration=kfh_source ) kda_service_role = aws_iam.Role(self, 'KDA_Dashboard_Role', assumed_by=aws_iam.ServicePrincipal('kinesisanalytics.amazonaws.com') ) kda_policy_stmt = aws_iam.PolicyStatement( actions=["*"], resources=["*"] ) kda_service_role.add_to_policy(kda_policy_stmt) # KA doesn't like - (dash) in names col1 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="state", sql_type="VARCHAR(2)", mapping="$.state" ) col2 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="event_time", sql_type="TIMESTAMP", mapping="$.event-time" ) col3 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="region", sql_type="VARCHAR(12)", mapping="$.region" ) col4 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="store_id", sql_type="INTEGER", mapping="$.store-id" ) col5 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="kpi_1", sql_type="INTEGER", mapping="$.kpi-1" ) col6 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="kpi_2", sql_type="INTEGER", mapping="$.kpi-2" ) col7 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="kpi_3", sql_type="INTEGER", mapping="$.kpi-3" ) col8 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="kpi_4", sql_type="INTEGER", mapping="$.kpi-4" ) col9 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty( name="kpi_5", sql_type="INTEGER", mapping="$.kpi-5" ) schema = aws_kinesisanalytics.CfnApplication.InputSchemaProperty( record_columns=[col2, col1, col3, col4, col5, col6, col7, col8, col9], record_encoding="UTF-8", record_format=aws_kinesisanalytics.CfnApplication.RecordFormatProperty( record_format_type="JSON", mapping_parameters=aws_kinesisanalytics.CfnApplication.MappingParametersProperty( json_mapping_parameters=aws_kinesisanalytics.CfnApplication.JSONMappingParametersProperty( record_row_path="$" ) ) ) ) kda_is = aws_kinesisanalytics.CfnApplication.KinesisStreamsInputProperty( resource_arn=kds_input_stream.stream_arn, role_arn=kda_service_role.role_arn ) ip = aws_kinesisanalytics.CfnApplication.InputProperty( name_prefix="SOURCE_SQL_STREAM", input_schema=schema, kinesis_streams_input=kda_is ) application_code = "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STORE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), \"store-id\" INTEGER, kpi_1_sum INTEGER, kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \ "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STATE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), kpi_1_sum INTEGER, kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \ "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_REGION\" (\"region\" VARCHAR(10), kpi_1_sum INTEGER, kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \ "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STORE\"" + \ "SELECT STREAM \"region\", \"state\", \"store-id\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \ "FROM \"SOURCE_SQL_STREAM_001\"" + \ "GROUP BY \"region\", \"state\", \"store-id\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \ "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STATE\"" + \ "SELECT STREAM \"region\", \"state\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \ "FROM \"SOURCE_SQL_STREAM_001\"" + \ "GROUP BY \"region\", \"state\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \ "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_REGION\"" + \ "SELECT STREAM \"region\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \ "FROM \"SOURCE_SQL_STREAM_001\"" + \ "GROUP BY \"region\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" kda_app = aws_kinesisanalytics.CfnApplication(self, "kda_agg", inputs=[ip], #kda_inputs, application_code=application_code, application_description="Aggregating data", application_name="DashboardMetricsAggregator" ) kda_output_prop = aws_kinesisanalytics.CfnApplicationOutput.KinesisStreamsOutputProperty( resource_arn=kds_output_stream.stream_arn, role_arn=kda_service_role.role_arn ) kda_dest_schema = aws_kinesisanalytics.CfnApplicationOutput.DestinationSchemaProperty( record_format_type="JSON" ) kda_output_prop_by_store = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty( destination_schema=kda_dest_schema, kinesis_streams_output=kda_output_prop, name="DESTINATION_SQL_STREAM_BY_STORE" ) kda_output_prop_by_state = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty( destination_schema=kda_dest_schema, kinesis_streams_output=kda_output_prop, name="DESTINATION_SQL_STREAM_BY_STATE" ) kda_output_prop_by_region = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty( destination_schema=kda_dest_schema, kinesis_streams_output=kda_output_prop, name="DESTINATION_SQL_STREAM_BY_REGION" ) kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_store", application_name="DashboardMetricsAggregator", output=kda_output_prop_by_store ) kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_state", application_name="DashboardMetricsAggregator", output=kda_output_prop_by_state ) kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_region", application_name="DashboardMetricsAggregator", output=kda_output_prop_by_region ) lambda_agg_function = aws_lambda.Function(self, "AggDataLambda", runtime=aws_lambda.Runtime.PYTHON_3_7, handler="lambda_function.lambda_handler", code=aws_lambda.Code.asset("../models/dashboard/lambdas/aggregate_data_lambda"), timeout=Duration.minutes(5)) lambda_agg_function.add_environment("DDB_TABLE_DASHBOARD", table.table_name) lambda_agg_function.add_to_role_policy(aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=[ "kinesis:*" ], resources=["*"] )) table.grant_read_write_data(lambda_agg_function) kes = aws_lambda_event_sources.KinesisEventSource(kds_output_stream, starting_position=aws_lambda.StartingPosition.TRIM_HORIZON, batch_size=50, #max_batching_window=100 ) lambda_agg_function.add_event_source(kes) core.CfnOutput( self, "TableName_Dashboard", description="Table name for Dashboard", value=table.table_name ) core.CfnOutput( self, "BucketName_Dashboard", description="Bucket name", value=ingest_bucket.bucket_arn ) core.CfnOutput( self, "KinesisInputStream_Dashboard", description="Kinesis input for Dashboard", value=kds_input_stream.stream_name ) core.CfnOutput( self, "KinesisOutputStream_Dashboard", description="Kinesis output for Dashboard", value=kds_output_stream.stream_name )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) API_ARN = self.node.try_get_context("api_arn") RATE = self.node.try_get_context("rate") if not API_ARN or not RATE: logger.error( f"Required context variables for {id} were not provided!") else: # Create the WAF IPSets doslist = wafv2.CfnIPSet( self, "Ext06DosIpSet", addresses=[], ip_address_version="IPV4", scope="REGIONAL", name="Ext06DosIpSet", ) suslist = wafv2.CfnIPSet( self, "Ext06SusIpSet", addresses=[], ip_address_version="IPV4", scope="REGIONAL", name="Ext06SusIpSet", ) # Create a WAF waf = wafv2.CfnWebACL( self, id="Ext06_WAF", name="Ext06-WAF", default_action=wafv2.CfnWebACL.DefaultActionProperty(allow={}), scope="REGIONAL", visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty( cloud_watch_metrics_enabled=True, metric_name="EXT06_WAF", sampled_requests_enabled=True), rules=[], ) # Create Susunban lambda lambda_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks", "ext_06") susunban_lambda = _lambda.Function( self, "Ext06ResponseSusUnbanFunction", runtime=_lambda.Runtime.PYTHON_3_8, handler="susunban_lambda.lambda_handler", code=_lambda.Code.from_asset(lambda_dir_path), environment={ "ipset_id": suslist.attr_id, "ipset_name": suslist.name, "ipset_scope": suslist.scope, }) # Assign WAF permissions to lambda susunban_lambda.add_to_role_policy( iam.PolicyStatement( actions=["wafv2:GetIPSet", "wafv2:UpdateIPSet"], effect=iam.Effect.ALLOW, resources=[suslist.attr_arn], )) # Create Dosunban lambda lambda_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks", "ext_06") dosunban_lambda = _lambda.Function( self, "Ext06ResponseDosUnbanFunction", runtime=_lambda.Runtime.PYTHON_3_8, handler="dosunban_lambda.lambda_handler", code=_lambda.Code.from_asset(lambda_dir_path), environment={ "ipset_id": doslist.attr_id, "ipset_name": doslist.name, "ipset_scope": doslist.scope, }) # Assign WAF permissions to lambda dosunban_lambda.add_to_role_policy( iam.PolicyStatement( actions=["wafv2:GetIPSet", "wafv2:UpdateIPSet"], effect=iam.Effect.ALLOW, resources=[doslist.attr_arn], )) # Create dos stepfunction # Define a second state machine to unban the blacklisted IP after 1 hour doswait_step = sfn.Wait( self, "Ext06ResponseStepDosWait", time=sfn.WaitTime.duration(core.Duration.hours(1)), ) suswait_step = sfn.Wait( self, "Ext06ResponseStepSusWait", time=sfn.WaitTime.duration(core.Duration.hours(1)), ) dosunban_step = sfn.Task( self, "Ext06ResponseStepDosUnban", task=tasks.RunLambdaTask( dosunban_lambda, integration_pattern=sfn.ServiceIntegrationPattern. FIRE_AND_FORGET, payload={"Input.$": "$"}, ), ) susunban_step = sfn.Task( self, "Ext06ResponseStepSosUnban", task=tasks.RunLambdaTask( susunban_lambda, integration_pattern=sfn.ServiceIntegrationPattern. FIRE_AND_FORGET, payload={"Input.$": "$"}, ), ) dos_statemachine = sfn.StateMachine( self, "Ext06ResponseDosUnbanStateMachine", definition=doswait_step.next(dosunban_step), timeout=core.Duration.hours(1.5), ) sus_statemachine = sfn.StateMachine( self, "Ext06ResponseSusUnbanStateMachine", definition=suswait_step.next(susunban_step), timeout=core.Duration.hours(1.5), ) # Create lambda function lambda_func = _lambda.Function( self, "Ext06ResponseFunction", runtime=_lambda.Runtime.PYTHON_3_8, handler="response_lambda.lambda_handler", code=_lambda.Code.from_asset(lambda_dir_path), environment={ "suslist_id": suslist.attr_id, "suslist_name": suslist.name, "suslist_scope": suslist.scope, "doslist_id": doslist.attr_id, "doslist_name": doslist.name, "doslist_scope": doslist.scope, "dos_arn": dos_statemachine.state_machine_arn, "sus_arn": sus_statemachine.state_machine_arn, }, ) kinesis_log = s3.Bucket( self, id='dos_logs', access_control=s3.BucketAccessControl.PUBLIC_READ_WRITE, ) # Assign permissions to response lambda lambda_func.add_to_role_policy( iam.PolicyStatement( actions=[ "wafv2:GetIPSet", "wafv2:UpdateIPSet", "states:StartExecution", "s3:GetObject", ], effect=iam.Effect.ALLOW, resources=[ doslist.attr_arn, suslist.attr_arn, sus_statemachine.state_machine_arn, dos_statemachine.state_machine_arn, kinesis_log.bucket_arn, kinesis_log.bucket_arn, kinesis_log.bucket_arn + "/*" ], )) # Create an IAM role for the steram stream_role = iam.Role( self, id="waf-kinesis-log-role", assumed_by=iam.ServicePrincipal( service="firehose.amazonaws.com", ), ) stream_permissions = iam.Policy( self, id="Ext-06-kinesis-permissions", statements=[ iam.PolicyStatement( actions=[ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject", ], effect=iam.Effect.ALLOW, resources=[ kinesis_log.bucket_arn, kinesis_log.bucket_arn + "/*" ], ) ]) stream_role.attach_inline_policy(stream_permissions) log_stream = firehose.CfnDeliveryStream( self, id="aws-waf-logs-ext06", delivery_stream_type="DirectPut", delivery_stream_name="aws-waf-logs-ext06", s3_destination_configuration=firehose.CfnDeliveryStream. S3DestinationConfigurationProperty( bucket_arn=kinesis_log.bucket_arn, buffering_hints=firehose.CfnDeliveryStream. BufferingHintsProperty(interval_in_seconds=300, size_in_m_bs=5), compression_format="UNCOMPRESSED", role_arn=stream_role.role_arn), ) kinesis_log.add_event_notification( s3.EventType.OBJECT_CREATED, dest=s3_notifications.LambdaDestination(lambda_func)) utc_time = datetime.now(tz=timezone.utc) utc_time = utc_time + timedelta(minutes=5) cron_string = "cron(" + str(utc_time.minute) + " " + str( utc_time.hour) + " " + str(utc_time.day) + " " + str( utc_time.month) + " ? " + str(utc_time.year) + ")" trigger = events.Rule( self, id="ext-06 setup", rule_name="Ext06-trigger", schedule=events.Schedule.expression(cron_string)) setup_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks", "ext_06") setup_func = _lambda.Function( self, id="Ext06Setup", runtime=_lambda.Runtime.PYTHON_3_8, handler="setup.lambda_handler", code=_lambda.Code.from_asset(setup_dir_path), environment={ "waf_arn": waf.attr_arn, "waf_id": waf.attr_id, "waf_scope": waf.scope, "waf_name": waf.name, "firehose_arn": log_stream.attr_arn, "rule_name": "Ext06-trigger", "doslist_arn": doslist.attr_arn, "rate": str(RATE), }, ) # Assign permissions to setup lambda setup_func.add_to_role_policy( iam.PolicyStatement( actions=[ "wafv2:PutLoggingConfiguration", "wafv2:GetWebACL", "wafv2:UpdateWebACL" ], effect=iam.Effect.ALLOW, resources=[waf.attr_arn, doslist.attr_arn], )) setup = targets.LambdaFunction(handler=setup_func, ) setup.bind(rule=trigger) trigger.add_target(target=setup) wafv2.CfnWebACLAssociation( self, id="API gateway association", resource_arn=API_ARN, web_acl_arn=waf.attr_arn, )
def __init__( self, scope: core.Construct, construct_id: str, **kwargs, ) -> None: super().__init__(scope, construct_id, **kwargs) # Get some context properties log_level = self.node.try_get_context("log_level") api_name = self.node.try_get_context("api_name") stage_name = self.node.try_get_context("stage_name") endpoint_filter = self.node.try_get_context("endpoint_filter") api_lambda_memory = self.node.try_get_context("api_lambda_memory") api_lambda_timeout = self.node.try_get_context("api_lambda_timeout") metrics_lambda_memory = self.node.try_get_context("metrics_lambda_memory") metrics_lambda_timeout = self.node.try_get_context("metrics_lambda_timeout") dynamodb_read_capacity = self.node.try_get_context("dynamodb_read_capacity") dynamodb_write_capacity = self.node.try_get_context("dynamodb_write_capacity") delivery_sync = self.node.try_get_context("delivery_sync") firehose_interval = self.node.try_get_context("firehose_interval") firehose_mb_size = self.node.try_get_context("firehose_mb_size") # Create dynamodb tables and kinesis stream per project assignment_table_name = f"{api_name}-assignment-{stage_name}" metrics_table_name = f"{api_name}-metrics-{stage_name}" delivery_stream_name = f"{api_name}-events-{stage_name}" log_stream_name = "ApiEvents" assignment_table = aws_dynamodb.Table( self, "AssignmentTable", table_name=assignment_table_name, partition_key=aws_dynamodb.Attribute( name="user_id", type=aws_dynamodb.AttributeType.STRING, ), sort_key=aws_dynamodb.Attribute( name="endpoint_name", type=aws_dynamodb.AttributeType.STRING, ), read_capacity=dynamodb_read_capacity, write_capacity=dynamodb_write_capacity, removal_policy=core.RemovalPolicy.DESTROY, time_to_live_attribute="ttl", ) metrics_table = aws_dynamodb.Table( self, "MetricsTable", table_name=metrics_table_name, partition_key=aws_dynamodb.Attribute( name="endpoint_name", type=aws_dynamodb.AttributeType.STRING ), read_capacity=dynamodb_read_capacity, write_capacity=dynamodb_write_capacity, removal_policy=core.RemovalPolicy.DESTROY, ) # Create lambda layer for "aws-xray-sdk" and latest "boto3" xray_layer = aws_lambda.LayerVersion( self, "XRayLayer", code=aws_lambda.AssetCode.from_asset("layers"), compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_7], description="A layer containing AWS X-Ray SDK for Python", ) # Create Lambda function to read from assignment and metrics table, log metrics # 2048MB is ~3% higher than 768 MB, it runs 2.5x faster # https://aws.amazon.com/blogs/aws/new-for-aws-lambda-functions-with-up-to-10-gb-of-memory-and-6-vcpus/ lambda_invoke = aws_lambda.Function( self, "ApiFunction", code=aws_lambda.AssetCode.from_asset("lambda/api"), handler="lambda_invoke.lambda_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, timeout=core.Duration.seconds(api_lambda_timeout), memory_size=api_lambda_memory, environment={ "ASSIGNMENT_TABLE": assignment_table.table_name, "METRICS_TABLE": metrics_table.table_name, "DELIVERY_STREAM_NAME": delivery_stream_name, "DELIVERY_SYNC": "true" if delivery_sync else "false", "LOG_LEVEL": log_level, }, layers=[xray_layer], tracing=aws_lambda.Tracing.ACTIVE, ) # Grant read/write permissions to assignment and metrics tables assignment_table.grant_read_data(lambda_invoke) assignment_table.grant_write_data(lambda_invoke) metrics_table.grant_read_data(lambda_invoke) # Add sagemaker invoke lambda_invoke.add_to_role_policy( aws_iam.PolicyStatement( actions=[ "sagemaker:InvokeEndpoint", ], resources=[ "arn:aws:sagemaker:{}:{}:endpoint/{}".format( self.region, self.account, endpoint_filter ), ], ) ) # Create API Gateway for api lambda, which will create an output aws_apigateway.LambdaRestApi( self, "Api", rest_api_name=api_name, deploy_options=aws_apigateway.StageOptions(stage_name=stage_name), proxy=True, handler=lambda_invoke, ) # Create lambda function for processing metrics lambda_register = aws_lambda.Function( self, "RegisterFunction", code=aws_lambda.AssetCode.from_asset("lambda/api"), handler="lambda_register.lambda_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, timeout=core.Duration.seconds(metrics_lambda_timeout), memory_size=metrics_lambda_memory, environment={ "METRICS_TABLE": metrics_table.table_name, "DELIVERY_STREAM_NAME": delivery_stream_name, "DELIVERY_SYNC": "true" if delivery_sync else "false", "LOG_LEVEL": log_level, }, layers=[xray_layer], tracing=aws_lambda.Tracing.ACTIVE, ) # Add write metrics metrics_table.grant_write_data(lambda_register) # Add sagemaker invoke lambda_register.add_to_role_policy( aws_iam.PolicyStatement( actions=[ "sagemaker:DescribeEndpoint", ], resources=[ "arn:aws:sagemaker:{}:{}:endpoint/{}".format( self.region, self.account, endpoint_filter ), ], ) ) # Grant permissions to the service catalog use role service_catalog_role = aws_iam.Role.from_role_arn( self, "RegisterRole", f"arn:{self.partition}:iam::{self.account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", ) lambda_register.grant_invoke(service_catalog_role) # Return the register lambda function as output core.CfnOutput(self, "RegisterLambda", value=lambda_register.function_name) # Get cloudwatch put metrics policy () cloudwatch_metric_policy = aws_iam.PolicyStatement( actions=["cloudwatch:PutMetricData"], resources=["*"] ) # If we are only using sync delivery, don't require firehose or s3 buckets if delivery_sync: metrics_table.grant_write_data(lambda_invoke) lambda_invoke.add_to_role_policy(cloudwatch_metric_policy) print("# No Firehose") return # Add kinesis stream logging lambda_invoke.add_to_role_policy( aws_iam.PolicyStatement( actions=[ "firehose:PutRecord", ], resources=[ "arn:aws:firehose:{}:{}:deliverystream/{}".format( self.region, self.account, delivery_stream_name ), ], ) ) # Create s3 bucket for event logging (name must be < 63 chars) s3_logs = aws_s3.Bucket( self, "S3Logs", removal_policy=core.RemovalPolicy.DESTROY, ) firehose_role = aws_iam.Role( self, "KinesisFirehoseRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), ) firehose_role.add_to_policy( aws_iam.PolicyStatement( actions=[ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject", ], resources=[s3_logs.bucket_arn, f"{s3_logs.bucket_arn}/*"], ) ) # Create LogGroup and Stream, and add permissions to role firehose_log_group = aws_logs.LogGroup(self, "FirehoseLogGroup") firehose_log_stream = firehose_log_group.add_stream(log_stream_name) firehose_role.add_to_policy( aws_iam.PolicyStatement( actions=[ "logs:PutLogEvents", ], resources=[ f"arn:{self.partition}:logs:{self.region}:{self.account}:log-group:{firehose_log_group.log_group_name}:log-stream:{firehose_log_stream.log_stream_name}", ], ) ) # Creat the firehose delivery stream with s3 destination aws_kinesisfirehose.CfnDeliveryStream( self, "KensisLogs", delivery_stream_name=delivery_stream_name, s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty( bucket_arn=s3_logs.bucket_arn, compression_format="GZIP", role_arn=firehose_role.role_arn, prefix=f"{stage_name}/", cloud_watch_logging_options=aws_kinesisfirehose.CfnDeliveryStream.CloudWatchLoggingOptionsProperty( enabled=True, log_group_name=firehose_log_group.log_group_name, log_stream_name=firehose_log_stream.log_stream_name, ), buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty( interval_in_seconds=firehose_interval, size_in_m_bs=firehose_mb_size, ), ), ) # Create lambda function for processing metrics lambda_metrics = aws_lambda.Function( self, "MetricsFunction", code=aws_lambda.AssetCode.from_asset("lambda/api"), handler="lambda_metrics.lambda_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, timeout=core.Duration.seconds(metrics_lambda_timeout), memory_size=metrics_lambda_memory, environment={ "METRICS_TABLE": metrics_table.table_name, "DELIVERY_STREAM_NAME": delivery_stream_name, "LOG_LEVEL": log_level, }, layers=[xray_layer], tracing=aws_lambda.Tracing.ACTIVE, ) # Add write metrics for dynamodb table metrics_table.grant_write_data(lambda_metrics) # Add put metrics for cloudwatch lambda_metrics.add_to_role_policy(cloudwatch_metric_policy) # Allow metrics to read form S3 and write to DynamoDB s3_logs.grant_read(lambda_metrics) # Create S3 logs notification for processing lambda notification = aws_s3_notifications.LambdaDestination(lambda_metrics) s3_logs.add_event_notification(aws_s3.EventType.OBJECT_CREATED, notification)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here role01 = iam.CfnRole( self, id="firehose01_role", assume_role_policy_document={ "Statement": [{ "Action": "sts:AssumeRole", "Effect": "Allow", "Principal": { "Service": "lambda.amazonaws.com" } }], "Version": "2012-10-17" }, managed_policy_arns=[ "arn:aws:iam::aws:policy/service-role/AWSLambdaKinesisExecutionRole" ]) policy01 = iam.CfnPolicy(self, id="firehose01_policy", policy_name="firehose01_policy", policy_document={ 'Version': "2012-10-17", 'Statement': [{ "Action": [ 's3:AbortMultipartUpload', 's3:GetBucketLocation', 's3:GetObject', 's3:ListBucket', 's3:ListBucketMultipartUploads', 's3:PutObject' ], "Resource": ['*'], "Effect": "Allow" }] }, roles=[role01.ref]) delivery_stream = kinesisfirehose.CfnDeliveryStream( self, id="firehose01", delivery_stream_name="firehose01", extended_s3_destination_configuration={ # s3桶信息 'bucketArn': 'arn:aws:s3:::fluent-bit-s3', # 压缩设置,老方案:gzip,新方案待定 'compressionFormat': 'GZIP', # 格式转换,是否转换为orc,parquet,默认无 'DataFormatConversionConfiguration': "Disabled", # 是否加密:默认无 'EncryptionConfiguration': "NoEncryption", # 错误输出前缀 'bufferingHints': { 'intervalInSeconds': 600, 'sizeInMBs': 128 }, 'ProcessingConfiguration': { "Enabled": True, "Processor": { "Type": "Lambda", "Parameters": [{ "ParameterName": "BufferIntervalInSeconds", "ParameterValue": "60" }, { "ParameterName": "BufferSizeInMBs", "ParameterValue": "3" }, { "ParameterName": "LambdaArn", "ParameterValue": "arn:aws:lambda:ap-southeast-1:596030579944:function:firehose-test" }] } }, 'roleArn': 'arn:aws:iam::596030579944:role/avalon_lambda_kinesis_role', 'S3BackupConfiguration': { "BucketARN": 'arn:aws:s3:::fluent-bit-s3', 'bufferingHints': { 'intervalInSeconds': 600, 'sizeInMBs': 128 }, 'compressionFormat': 'GZIP', 'EncryptionConfiguration': "NoEncryption", 'Prefix': "/backup", 'roleArn': 'arn:aws:iam::596030579944:role/avalon_lambda_kinesis_role' } }, )
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # create db for glue schema glue_db = glue.Database( self, 'GlueDB', database_name='reddit_data', ) # data schema glue_table = glue.Table( self, 'GlueTable', table_name='sentiment', columns=[ glue.Column(name='@timestamp', type=glue.Schema.TIMESTAMP), glue.Column(name='id', type=glue.Schema.STRING), glue.Column(name='subreddit', type=glue.Schema.STRING), glue.Column(name='body', type=glue.Schema.STRING), glue.Column(name='is_submitter', type=glue.Schema.BOOLEAN), glue.Column(name='polarity', type=glue.Schema.FLOAT), glue.Column(name='subjectivity', type=glue.Schema.FLOAT), glue.Column(name='author', type=glue.Schema.STRING), ], database=glue_db, data_format=glue.DataFormat.PARQUET, bucket=s3.Bucket.from_bucket_arn(self, 'DataBucket', BUCKET_ARN), s3_prefix='reddit/', ) # role assumed by firehose stream_role = iam.Role( self, 'FirehoseRole', assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'), description='role used by Firehose to access s3 bucket', ) # add s3 statement stream_role.add_to_policy( iam.PolicyStatement( resources=[BUCKET_ARN, f'{BUCKET_ARN}/*'], actions=[ 's3:AbortMultipartUpload', 's3:GetBucketLocation', 's3:GetObject', 's3:ListBucket', 's3:ListBucketMultipartUploads', 's3:PutObject', ], )) # add glue statement stream_role.add_to_policy( iam.PolicyStatement( resources=[ glue_table.table_arn, glue_db.database_arn, glue_db.catalog_arn, ], actions=[ 'glue:GetTable', 'glue:GetTableVersion', 'glue:GetTableVersions', ], )) # cloudwatch statement stream_role.add_to_policy( iam.PolicyStatement( resources=['*'], actions=[ 'logs:PutLogEvents', ], )) data_format_conversion_configuration = kf.CfnDeliveryStream.DataFormatConversionConfigurationProperty( enabled=True, input_format_configuration=kf.CfnDeliveryStream. InputFormatConfigurationProperty( deserializer=kf.CfnDeliveryStream.DeserializerProperty( hive_json_ser_de=kf.CfnDeliveryStream. HiveJsonSerDeProperty(), ), ), output_format_configuration=kf.CfnDeliveryStream. OutputFormatConfigurationProperty( serializer=kf.CfnDeliveryStream.SerializerProperty( parquet_ser_de=kf.CfnDeliveryStream.ParquetSerDeProperty(), ), ), schema_configuration=kf.CfnDeliveryStream. SchemaConfigurationProperty( database_name=glue_db.database_name, table_name=glue_table.table_name, role_arn=stream_role.role_arn, region='us-east-2', ), ) s3_config = kf.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=BUCKET_ARN, # temporary, will replace with env variable role_arn=stream_role.role_arn, data_format_conversion_configuration= data_format_conversion_configuration, prefix='reddit/', buffering_hints=kf.CfnDeliveryStream.BufferingHintsProperty( size_in_m_bs=64, ), ) firehose = kf.CfnDeliveryStream( self, 'FirehoseStream', delivery_stream_name='RedditDataStream', extended_s3_destination_configuration=s3_config, ) # add role dependency firehose.node.add_dependency(stream_role) # add ECS Fargate instance app_role = iam.Role( self, 'RedditStreamingAppRole', assumed_by=iam.ServicePrincipal('ecs-tasks.amazonaws.com'), description= 'Role used by the Reddit Streaming Application Fargate Task', ) # add firehose permissions app_role.add_to_policy( iam.PolicyStatement( resources=[firehose.attr_arn], actions=[ 'firehose:DeleteDeliveryStream', 'firehose:PutRecord', 'firehose:PutRecordBatch', 'firehose:UpdateDestination', ], )) # add ecs and cloudwatch permissions app_role.add_to_policy( iam.PolicyStatement( resources=['*'], actions=[ 'ecr:GetAuthorizationToken', 'ecr:BatchCheckLayerAvailability', 'ecr:GetDownloadUrlForLayer', 'ecr:BatchGetImage', 'logs:CreateLogStream', 'logs:PutLogEvents', ], )) vpc = ec2.Vpc(self, 'RedditVpc', max_azs=3) cluster = ecs.Cluster(self, 'RedditCluster', vpc=vpc) task_definition = ecs.FargateTaskDefinition( self, 'TaskDefinition', memory_limit_mib=512, cpu=256, task_role=app_role, ) task_definition.add_container( id='RedditStreamingApp', image=ecs.ContainerImage.from_asset('./sentiment_analysis'), command=['all'], environment={ 'FIREHOSE_STREAM_NAME': firehose.delivery_stream_name, 'PRAW_CLIENT_SECRET': os.environ['PRAW_CLIENT_SECRET'], 'PRAW_CLIENT_ID': os.environ['PRAW_CLIENT_ID'], 'PRAW_USER_AGENT': os.environ['PRAW_USER_AGENT'], }, logging=ecs.LogDriver.aws_logs(stream_prefix='reddit'), ) container = ecs.FargateService( self, 'StreamingApplication', desired_count=1, task_definition=task_definition, cluster=cluster, assign_public_ip=True, )
def __init__(self, scope: core.Construct, id: str, props: KinesisFirehoseStackProps, **kwargs) -> None: super().__init__(scope, id, **kwargs) lambda_repository = aws_codecommit.Repository( self, "ClicksProcessingLambdaRepository", repository_name="MythicalMysfits-ClicksProcessingLambdaRepository", ) core.CfnOutput( self, "kinesisRepositoryCloneUrlHttp", value=lambda_repository.repository_clone_url_http, description="Clicks Processing Lambda Repository Clone URL HTTP", ) core.CfnOutput( self, "kinesisRepositoryCloneUrlSsh", value=lambda_repository.repository_clone_url_ssh, description="Clicks Processing Lambda Repository Clone URL SSH", ) clicks_destination_bucket = aws_s3.Bucket(self, "Bucket", versioned=True) lambda_function_policy = aws_iam.PolicyStatement() lambda_function_policy.add_actions("dynamodb:GetItem") lambda_function_policy.add_resources(props.table.table_arn) mysfits_clicks_processor = aws_lambda.Function( self, "Function", handler="streamProcessor.processRecord", runtime=aws_lambda.Runtime.PYTHON_3_7, description= "An Amazon Kinesis Firehose stream processor that enriches click records to not just include a mysfitId, but also other attributes that can be analyzed later.", memory_size=128, code=aws_lambda.Code.asset("../../lambda-streaming-processor"), timeout=core.Duration.seconds(30), initial_policy=[lambda_function_policy], environment={ # TODO: this seems better than having the user copy/paste it in, but is it the best way? "MYSFITS_API_URL": "https://{}.execute-api.{}.amazonaws.com/prod/".format( props.api_gateway.ref, core.Aws.REGION) }, ) firehose_delivery_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), external_id=core.Aws.ACCOUNT_ID, ) firehose_delivery_policy_s3_statement = aws_iam.PolicyStatement() firehose_delivery_policy_s3_statement.add_actions( "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject", ) firehose_delivery_policy_s3_statement.add_resources( clicks_destination_bucket.bucket_arn) firehose_delivery_policy_s3_statement.add_resources( clicks_destination_bucket.arn_for_objects("*")) firehose_delivery_policy_lambda_statement = aws_iam.PolicyStatement() firehose_delivery_policy_lambda_statement.add_actions( "lambda:InvokeFunction") firehose_delivery_policy_lambda_statement.add_resources( mysfits_clicks_processor.function_arn) firehose_delivery_role.add_to_policy( firehose_delivery_policy_s3_statement) firehose_delivery_role.add_to_policy( firehose_delivery_policy_lambda_statement) mysfits_firehose_to_s3 = aws_kinesisfirehose.CfnDeliveryStream( self, "DeliveryStream", extended_s3_destination_configuration=aws_kinesisfirehose. CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=clicks_destination_bucket.bucket_arn, buffering_hints=aws_kinesisfirehose.CfnDeliveryStream. BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=50), compression_format="UNCOMPRESSED", prefix="firehose/", role_arn=firehose_delivery_role.role_arn, processing_configuration=aws_kinesisfirehose.CfnDeliveryStream. ProcessingConfigurationProperty( enabled=True, processors=[ aws_kinesisfirehose.CfnDeliveryStream. ProcessorProperty( parameters=[ aws_kinesisfirehose.CfnDeliveryStream. ProcessorParameterProperty( parameter_name="LambdaArn", parameter_value=mysfits_clicks_processor. function_arn, ) ], type="Lambda", ) ], ), ), ) aws_lambda.CfnPermission( self, "Permission", action="lambda:InvokeFunction", function_name=mysfits_clicks_processor.function_arn, principal="firehose.amazonaws.com", source_account=core.Aws.ACCOUNT_ID, source_arn=mysfits_firehose_to_s3.attr_arn, ) click_processing_api_role = aws_iam.Role( self, "ClickProcessingApiRole", assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"), ) api_policy = aws_iam.PolicyStatement() api_policy.add_actions("firehose:PutRecord") api_policy.add_resources(mysfits_firehose_to_s3.attr_arn) aws_iam.Policy( self, "ClickProcessingApiPolicy", policy_name="api_gateway_firehose_proxy_role", statements=[api_policy], roles=[click_processing_api_role], ) api = aws_apigateway.RestApi( self, "APIEndpoint", rest_api_name="ClickProcessing API Service", endpoint_types=[aws_apigateway.EndpointType.REGIONAL], ) clicks = api.root.add_resource("clicks") clicks.add_method( "PUT", aws_apigateway.AwsIntegration( service="firehose", integration_http_method="POST", action="PutRecord", options=aws_apigateway.IntegrationOptions( connection_type=aws_apigateway.ConnectionType.INTERNET, credentials_role=click_processing_api_role, integration_responses=[ aws_apigateway.IntegrationResponse( status_code="200", response_templates={ "application/json": '{"status": "OK"}' }, response_parameters={ "method.response.header.Access-Control-Allow-Headers": "'Content-Type'", "method.response.header.Access-Control-Allow-Methods": "'OPTIONS,PUT'", "method.response.header.Access-Control-Allow-Origin": "'*'", }, ) ], request_parameters={ "integration.request.header.Content-Type": "'application/x-amz-json-1.1'" }, request_templates={ "application/json": """{ "DeliveryStreamName": "%s", "Record": { "Data": "$util.base64Encode($input.json('$'))" }}""" % mysfits_firehose_to_s3.ref }, ), ), method_responses=[ aws_apigateway.MethodResponse( status_code="200", response_parameters={ "method.response.header.Access-Control-Allow-Headers": True, "method.response.header.Access-Control-Allow-Methods": True, "method.response.header.Access-Control-Allow-Origin": True, }, ) ], ) clicks.add_method( "OPTIONS", aws_apigateway.MockIntegration( integration_responses=[ aws_apigateway.IntegrationResponse( status_code="200", response_parameters={ "method.response.header.Access-Control-Allow-Headers": "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent'", "method.response.header.Access-Control-Allow-Origin": "'*'", "method.response.header.Access-Control-Allow-Credentials": "'false'", "method.response.header.Access-Control-Allow-Methods": "'OPTIONS,GET,PUT,POST,DELETE'", }, ) ], passthrough_behavior=aws_apigateway.PassthroughBehavior.NEVER, request_templates={"application/json": '{"statusCode": 200}'}, ), method_responses=[ aws_apigateway.MethodResponse( status_code="200", response_parameters={ "method.response.header.Access-Control-Allow-Headers": True, "method.response.header.Access-Control-Allow-Methods": True, "method.response.header.Access-Control-Allow-Credentials": True, "method.response.header.Access-Control-Allow-Origin": True, }, ) ], )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here ########################################################################### # AWS SECRETS MANAGER - Templated secret ########################################################################### # templated_secret = aws_secretsmanager.Secret(self, "TemplatedSecret", # generate_secret_string=aws_secretsmanager.SecretStringGenerator( # secret_string_template= "{\"username\":\"cleanbox\"}", # generate_string_key="password" # ) # ) ########################################################################### # CUSTOM CLOUDFORMATION RESOURCE ########################################################################### # customlambda = aws_lambda.Function(self,'customconfig', # handler='customconfig.on_event', # runtime=aws_lambda.Runtime.PYTHON_3_7, # code=aws_lambda.Code.asset('customconfig'), # ) # customlambda_statement = aws_iam.PolicyStatement(actions=["events:PutRule"], conditions=None, effect=None, not_actions=None, not_principals=None, not_resources=None, principals=None, resources=["*"], sid=None) # customlambda.add_to_role_policy(statement=customlambda_statement) # my_provider = cr.Provider(self, "MyProvider", # on_event_handler=customlambda, # # is_complete_handler=is_complete, # optional async "waiter" # log_retention=logs.RetentionDays.SIX_MONTHS # ) # CustomResource(self, 'customconfigresource', service_token=my_provider.service_token) ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### sqs_to_elastic_cloud = aws_lambda.Function( self, 'sqs_to_elastic_cloud', handler='sqs_to_elastic_cloud.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elastic_cloud'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=logs.RetentionDays.ONE_DAY) sqs_to_elasticsearch_service = aws_lambda.Function( self, 'sqs_to_elasticsearch_service', handler='sqs_to_elasticsearch_service.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elasticsearch_service'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=logs.RetentionDays.ONE_DAY) # sqs_to_elasticsearch_service.add_environment("kinesis_firehose_name", "-") # sqs_to_elastic_cloud.add_environment("index_name", "-") ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### # sqs_to_elasticsearch_service_permission = aws_lambda.Permission(*, principal, action=None, event_source_token=None, scope=None, source_account=None, source_arn=None) ########################################################################### # AMAZON S3 BUCKETS ########################################################################### access_log_bucket = aws_s3.Bucket(self, "access_log_bucket") kinesis_log_bucket = aws_s3.Bucket(self, "kinesis_log_bucket") ########################################################################### # LAMBDA SUPPLEMENTAL POLICIES ########################################################################### lambda_supplemental_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=["s3:Get*", "s3:Head*", "s3:List*", "firehose:*"], resources=["*"]) sqs_to_elastic_cloud.add_to_role_policy( lambda_supplemental_policy_statement) sqs_to_elasticsearch_service.add_to_role_policy( lambda_supplemental_policy_statement) ########################################################################### # AWS SNS TOPICS ########################################################################### access_log_topic = aws_sns.Topic(self, "access_log_topic") ########################################################################### # ADD AMAZON S3 BUCKET NOTIFICATIONS ########################################################################### access_log_bucket.add_event_notification( aws_s3.EventType.OBJECT_CREATED, aws_s3_notifications.SnsDestination(access_log_topic)) ########################################################################### # AWS SQS QUEUES ########################################################################### sqs_to_elasticsearch_service_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue_dlq") sqs_to_elasticsearch_service_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elasticsearch_service_queue_iqueue) sqs_to_elasticsearch_service_queue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=sqs_to_elasticsearch_service_queue_dlq) sqs_to_elastic_cloud_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue_dlq") sqs_to_elastic_cloud_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elastic_cloud_queue_iqueue) sqs_to_elastic_cloud_queue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=sqs_to_elastic_cloud_queue_dlq) ########################################################################### # AWS SNS TOPIC SUBSCRIPTIONS ########################################################################### access_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription(sqs_to_elastic_cloud_queue)) access_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription( sqs_to_elasticsearch_service_queue)) ########################################################################### # AWS LAMBDA SQS EVENT SOURCE ########################################################################### sqs_to_elastic_cloud.add_event_source( SqsEventSource(sqs_to_elastic_cloud_queue, batch_size=10)) sqs_to_elasticsearch_service.add_event_source( SqsEventSource(sqs_to_elasticsearch_service_queue, batch_size=10)) ########################################################################### # AWS ELASTICSEARCH DOMAIN ########################################################################### ########################################################################### # AWS ELASTICSEARCH DOMAIN ACCESS POLICY ########################################################################### this_aws_account = aws_iam.AccountPrincipal(account_id="012345678912") # s3_to_elasticsearch_access_logs_domain_access_policy_statement = aws_iam.PolicyStatement( # principals=[this_aws_account], # effect=aws_iam.Effect.ALLOW, # actions=["es:*"], # resources=["*"] # ) # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list=[] # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list.append(s3_to_elasticsearch_access_logs_domain_access_policy_statement) s3_to_elasticsearch_access_logs_domain = aws_elasticsearch.Domain( self, "s3-to-elasticsearch-access-logs-domain", # access_policies=s3_to_elasticsearch_access_logs_domain_access_policy_statement_list, version=aws_elasticsearch.ElasticsearchVersion.V7_1, capacity={ "master_nodes": 3, "data_nodes": 4 }, ebs={"volume_size": 100}, zone_awareness={"availability_zone_count": 2}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }) ########################################################################### # AMAZON COGNITO USER POOL ########################################################################### s3_to_elasticsearch_user_pool = aws_cognito.UserPool( self, "s3-to-elasticsearch-access-logs-pool", account_recovery=None, auto_verify=None, custom_attributes=None, email_settings=None, enable_sms_role=None, lambda_triggers=None, mfa=None, mfa_second_factor=None, password_policy=None, self_sign_up_enabled=None, sign_in_aliases=aws_cognito.SignInAliases(email=True, phone=None, preferred_username=None, username=True), sign_in_case_sensitive=None, sms_role=None, sms_role_external_id=None, standard_attributes=None, user_invitation=None, user_pool_name=None, user_verification=None) ########################################################################### # AMAZON KINESIS FIREHOSE STREAM ########################################################################### # kinesis_policy_statement = aws_iam.PolicyStatement( # effect=aws_iam.Effect.ALLOW, # # actions=["es:*", "s3:*", "kms:*", "kinesis:*", "lambda:*"], # actions=["*"], # resources=["*"] # ) # kinesis_policy_document = aws_iam.PolicyDocument() # kinesis_policy_document.add_statements(kinesis_policy_statement) kinesis_firehose_stream_role = aws_iam.Role( self, "BaseVPCIAMLogRole", assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com'), role_name=None, inline_policies={ "AllowLogAccess": aws_iam.PolicyDocument( assign_sids=False, statements=[ aws_iam.PolicyStatement(actions=[ '*', 'es:*', 'logs:PutLogEvents', 'logs:DescribeLogGroups', 'logs:DescribeLogsStreams' ], effect=aws_iam.Effect('ALLOW'), resources=['*']) ]) }) RetryOptions = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchRetryOptionsProperty( duration_in_seconds=300) s3_configuration = aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty( bucket_arn=kinesis_log_bucket.bucket_arn, role_arn=kinesis_firehose_stream_role.role_arn) ElasticsearchDestinationConfiguration = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( # "BufferingHints" : ElasticsearchBufferingHints, # "CloudWatchLoggingOptions" : CloudWatchLoggingOptions, # "ClusterEndpoint" : String, domain_arn=s3_to_elasticsearch_access_logs_domain.domain_arn, index_name="s3-to-elasticsearch-accesslogs", index_rotation_period="OneDay", # "ProcessingConfiguration" : ProcessingConfiguration, retry_options=RetryOptions, role_arn=kinesis_firehose_stream_role.role_arn, # "S3BackupMode" : String, s3_configuration=s3_configuration # "TypeName" : String # "VpcConfiguration" : VpcConfiguration ) kinesis_firehose_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "kinesis_firehose_stream", delivery_stream_encryption_configuration_input=None, delivery_stream_name=None, delivery_stream_type=None, elasticsearch_destination_configuration= ElasticsearchDestinationConfiguration, extended_s3_destination_configuration=None, http_endpoint_destination_configuration=None, kinesis_stream_source_configuration=None, redshift_destination_configuration=None, s3_destination_configuration=None, splunk_destination_configuration=None, tags=None) sqs_to_elasticsearch_service.add_environment( "FIREHOSE_NAME", kinesis_firehose_stream.ref) sqs_to_elasticsearch_service.add_environment( "QUEUEURL", sqs_to_elasticsearch_service_queue.queue_url) sqs_to_elasticsearch_service.add_environment("DEBUG", "False") sqs_to_elastic_cloud.add_environment("ELASTICCLOUD_SECRET_NAME", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_ID", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_PASSWORD", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_USERNAME", "-") sqs_to_elastic_cloud.add_environment( "QUEUEURL", sqs_to_elastic_cloud_queue.queue_url) sqs_to_elastic_cloud.add_environment("DEBUG", "False")
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter( self, 'OpenSearchDomainName', type='String', description='Amazon OpenSearch Service domain name', default='opensearch-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[a-z]+[A-Za-z0-9\-]+') OPENSEARCH_INDEX_NAME = cdk.CfnParameter( self, 'SearchIndexName', type='String', description='Amazon OpenSearch Service index name') EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) # vpc = aws_ec2.Vpc( self, "EKKStackVPC", max_azs=3, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) sg_use_opensearch = aws_ec2.SecurityGroup( self, "OpenSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch client', security_group_name='use-opensearch-cluster-sg') cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg') sg_opensearch_cluster = aws_ec2.SecurityGroup( self, "OpenSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch cluster', security_group_name='opensearch-cluster-sg') cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') master_user_secret = aws_secretsmanager.Secret( self, "OpenSearchMasterUserSecret", generate_secret_string=aws_secretsmanager.SecretStringGenerator( secret_string_template=json.dumps({"username": "******"}), generate_string_key="password", # Master password must be at least 8 characters long and contain at least one uppercase letter, # one lowercase letter, one number, and one special character. password_length=8)) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 # You should camelCase the property names instead of PascalCase opensearch_domain = aws_opensearchservice.Domain( self, "OpenSearch", domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string, version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0, #XXX: You cannot use graviton instances with non-graviton instances. # Use graviton instances as data nodes or use non-graviton instances as master nodes. capacity={ "master_nodes": 3, "master_node_instance_type": "r6g.large.search", "data_nodes": 3, "data_node_instance_type": "r6g.large.search" }, ebs={ "volume_size": 10, "volume_type": aws_ec2.EbsDeviceVolumeType.GP2 }, #XXX: az_count must be equal to vpc subnets count. zone_awareness={"availability_zone_count": 3}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }, fine_grained_access_control=aws_opensearchservice. AdvancedSecurityOptions( master_user_name=master_user_secret.secret_value_from_json( "username").to_string(), master_user_password=master_user_secret.secret_value_from_json( "password")), # Enforce HTTPS is required when fine-grained access control is enabled. enforce_https=True, # Node-to-node encryption is required when fine-grained access control is enabled node_to_node_encryption=True, # Encryption-at-rest is required when fine-grained access control is enabled. encryption_at_rest={"enabled": True}, use_unsigned_basic_auth=True, security_groups=[sg_opensearch_cluster], automated_snapshot_start_hour=17, # 2 AM (GTM+9) vpc=vpc, vpc_subnets=[ aws_ec2.SubnetSelection( one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT) ], removal_policy=cdk.RemovalPolicy. DESTROY # default: cdk.RemovalPolicy.RETAIN ) cdk.Tags.of(opensearch_domain).add( 'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}') S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="opskk-stack-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ opensearch_domain.domain_arn, "{}/*".format(opensearch_domain.domain_arn) ], actions=[ "es:DescribeElasticsearchDomain", "es:DescribeElasticsearchDomains", "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost", "es:ESHttpPut" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/ resources=[ opensearch_domain.domain_arn, f"{opensearch_domain.domain_arn}/_all/_settings", f"{opensearch_domain.domain_arn}/_cluster/stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%", f"{opensearch_domain.domain_arn}/_nodes", f"{opensearch_domain.domain_arn}/_nodes/stats", f"{opensearch_domain.domain_arn}/_nodes/*/stats", f"{opensearch_domain.domain_arn}/_stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats" ], actions=["es:ESHttpGet"])) firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name= f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty( role_arn=firehose_role.role_arn, security_group_ids=[sg_use_opensearch.security_group_id], subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids) opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( index_name=OPENSEARCH_INDEX_NAME.value_as_string, role_arn=firehose_role.role_arn, s3_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Backup" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console. "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/", "roleArn": firehose_role.role_arn }, buffering_hints={ "intervalInSeconds": 60, "sizeInMBs": 1 }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "ElasticsearchDelivery" }, domain_arn=opensearch_domain.domain_arn, index_rotation_period= "NoRotation", # [NoRotation | OneDay | OneHour | OneMonth | OneWeek] retry_options={"durationInSeconds": 60}, s3_backup_mode= "FailedDocumentsOnly", # [AllDocuments | FailedDocumentsOnly] vpc_configuration=opensearch_dest_vpc_config) firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToES", delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string, delivery_stream_type="DirectPut", elasticsearch_destination_configuration=opensearch_dest_config, tags=[{ "key": "Name", "value": OPENSEARCH_INDEX_NAME.value_as_string }]) cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'OpenSearchDomainEndpoint', value=opensearch_domain.domain_endpoint, export_name='OpenSearchDomainEndpoint') cdk.CfnOutput( self, 'OpenSearchDashboardsURL', value=f"{opensearch_domain.domain_endpoint}/_dashboards/", export_name='OpenSearchDashboardsURL') cdk.CfnOutput(self, 'MasterUserSecretId', value=master_user_secret.secret_name, export_name='MasterUserSecretId') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket') cdk.CfnOutput(self, 'FirehoseRoleArn', value=firehose_role.role_arn, export_name='FirehoseRoleArn')
def __init__(self, scope: core.Construct, construct_id: str, stack_log_level: str, src_stream, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here # Create an S3 Bucket for storing streaming data events from firehose fh_data_store = _s3.Bucket(self, "fhDataStore", removal_policy=core.RemovalPolicy.DESTROY, auto_delete_objects=False) firehose_delivery_stream_name = f"phi_data_filter" # Firehose Lambda Transformer # Read Lambda Code try: with open( "sensitive_data_filter_instream/stacks/back_end/firehose_transformation_stack/lambda_src/kinesis_firehose_transformer.py", encoding="utf-8", mode="r") as f: fh_transformer_fn_code = f.read() except OSError: print("Unable to read Lambda Function Code") raise fh_transformer_fn = _lambda.Function( self, "fhDataTransformerFn", function_name=f"fh_data_transformer", description="Transform incoming data events with newline character", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(fh_transformer_fn_code), handler="index.lambda_handler", timeout=core.Duration.seconds(60), reserved_concurrent_executions=1, environment={ "LOG_LEVEL": "INFO", "APP_ENV": "Production", }) # Create Custom Loggroup for Producer fh_transformer_fn_lg = _logs.LogGroup( self, "fhDataTransformerFnLogGroup", log_group_name=f"/aws/lambda/{fh_transformer_fn.function_name}", removal_policy=core.RemovalPolicy.DESTROY, retention=_logs.RetentionDays.ONE_DAY) fh_delivery_role = _iam.Role( self, "fhDeliveryRole", # role_name="FirehoseDeliveryRole", assumed_by=_iam.ServicePrincipal("firehose.amazonaws.com"), external_id=core.Aws.ACCOUNT_ID, ) # Add permissions to allow Kinesis Fireshose to Write to S3 roleStmt1 = _iam.PolicyStatement(effect=_iam.Effect.ALLOW, resources=[ f"{fh_data_store.bucket_arn}", f"{fh_data_store.bucket_arn}/*" ], actions=[ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ]) # roleStmt1.add_resources( # fh_data_store.arn_for_objects("*") # ) roleStmt1.sid = "AllowKinesisToWriteToS3" fh_delivery_role.add_to_policy(roleStmt1) # Add permissions to Kinesis Fireshose to Write to CloudWatch Logs roleStmt2 = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ f"arn:aws:logs:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:log-group:/aws/kinesisfirehose/{firehose_delivery_stream_name}:log-stream:*" ], actions=["logs:PutLogEvents"]) roleStmt2.sid = "AllowKinesisToWriteToCloudWatch" fh_delivery_role.add_to_policy(roleStmt2) # Add permissions to Kinesis Fireshose to Invoke Lambda for Transformations roleStmt3 = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[f"{fh_transformer_fn.function_arn}"], actions=["lambda:InvokeFunction"]) roleStmt3.sid = "AllowKinesisToInvokeLambda" fh_delivery_role.add_to_policy(roleStmt3) # Add permissions to Kinesis Fireshose to Read from Kinesis Data Stream policy_to_allow_fh_to_read_stream = _iam.Policy( self, "allowKinesisFhToReadKinesisDataStream", roles=[fh_delivery_role], statements=[ _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[f"{src_stream.stream_arn}"], sid="AllowKinesisFhToReadKinesisDataStream", actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords", "kinesis:ListShards" ]) ]) self.fh_to_s3 = _kinesis_fh.CfnDeliveryStream( self, "fhDeliveryStream", delivery_stream_name=f"{firehose_delivery_stream_name}", delivery_stream_type=f"KinesisStreamAsSource", kinesis_stream_source_configuration=_kinesis_fh.CfnDeliveryStream. KinesisStreamSourceConfigurationProperty( kinesis_stream_arn=f"{src_stream.stream_arn}", role_arn=f"{fh_delivery_role.role_arn}"), extended_s3_destination_configuration=_kinesis_fh. CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty( bucket_arn=fh_data_store.bucket_arn, buffering_hints=_kinesis_fh.CfnDeliveryStream. BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=1), compression_format="UNCOMPRESSED", prefix=f"phi-data/", # prefix="phi-data/date=!{timestamp:yyyy}-!{timestamp:MM}-!{timestamp:dd}/", role_arn=fh_delivery_role.role_arn, processing_configuration=_kinesis_fh.CfnDeliveryStream. ProcessingConfigurationProperty( enabled=True, processors=[ _kinesis_fh.CfnDeliveryStream.ProcessorProperty( parameters=[ _kinesis_fh.CfnDeliveryStream. ProcessorParameterProperty( parameter_name="LambdaArn", parameter_value=fh_transformer_fn. function_arn, ) ], type="Lambda", ) ]), ), ) self.fh_to_s3.add_depends_on( policy_to_allow_fh_to_read_stream.node.default_child) # Restrict Transformer Lambda to be invoked by Firehose only from the stack owner account _lambda.CfnPermission( self, "restrictLambdaInvocationToFhInOwnAccount", action="lambda:InvokeFunction", function_name=fh_transformer_fn.function_arn, principal="firehose.amazonaws.com", source_account=core.Aws.ACCOUNT_ID, source_arn=self.fh_to_s3.attr_arn, ) ########################################### ################# OUTPUTS ################# ########################################### output_0 = core.CfnOutput( self, "AutomationFrom", value=f"{GlobalArgs.SOURCE_INFO}", description= "To know more about this automation stack, check out our github page." ) output_1 = core.CfnOutput( self, "FirehoseArn", value= f"https://console.aws.amazon.com/firehose/home?region={core.Aws.REGION}#/details/{self.fh_to_s3.delivery_stream_name}", description= "Produce streaming data events and push to Kinesis stream.") output_2 = core.CfnOutput( self, "FirehoseDataStore", value= f"https://console.aws.amazon.com/s3/buckets/{fh_data_store.bucket_name}", description="The firehose datastore bucket") output_3 = core.CfnOutput( self, "SensitiveDataFilter", value= f"https://console.aws.amazon.com/lambda/home?region={core.Aws.REGION}#/functions/{fh_transformer_fn.function_name}", description="Filter Sensitive data from event.")