Esempio n. 1
0
    def __init__(self, app: App, id: str) -> None:
        super().__init__(app, id)

        with open("lambda-handler.py", encoding="utf8") as fp:
            handler_code = fp.read()

        # Creates reference to already existing kinesis stream
        kinesis_stream = kinesis.Stream.from_stream_arn(
            self, 'KinesisStream',
            Arn.format(
                ArnComponents(resource='stream',
                              service='kinesis',
                              resource_name='my-stream'), self))

        lambdaFn = lambda_.Function(self,
                                    'Singleton',
                                    handler='index.main',
                                    code=lambda_.InlineCode(handler_code),
                                    runtime=lambda_.Runtime.PYTHON_3_7,
                                    timeout=Duration.seconds(300))

        # Update Lambda Permissions To Use Stream
        kinesis_stream.grant_read(lambdaFn)

        # Create New Kinesis Event Source
        kinesis_event_source = event_sources.KinesisEventSource(
            stream=kinesis_stream,
            starting_position=lambda_.StartingPosition.LATEST,
            batch_size=1)

        # Attach New Event Source To Lambda
        lambdaFn.add_event_source(kinesis_event_source)
Esempio n. 2
0
    def create_stream_processor(self):
        # API Endpoint
        stream_function_source = _lmbd.Code.asset("../stream_function")

        # Stream processor lambda function
        stream_function = _lmbd.Function(
            self,
            "stream-function",
            handler="main.handler",
            function_name=f"{self.prefix}-stream-function",
            runtime=_lmbd.Runtime.PYTHON_3_8,
            code=stream_function_source,
            timeout=core.Duration.seconds(5),
            memory_size=128,
            environment={
                "OUTPUT_BUCKET": self.s3_bucket.bucket_name,
            },
        )

        # Enable access to S3 bucket
        self.s3_bucket.grant_read_write(stream_function)

        # Connecting the processor to the incoming event stream
        self.stream.grant_read(stream_function)
        speev_event_source = _srcs.KinesisEventSource(
            stream=self.stream,
            starting_position=_lmbd.StartingPosition.LATEST,
            batch_size=1000,
            max_batching_window=core.Duration.seconds(10),
        )
        stream_function.add_event_source(speev_event_source)
    def __init__(self, scope: core.Construct, id: str, stream: ks.Stream,
                 auditTable: ddb.Table, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self.kinesis_processor = lambda_.Function(
            self,
            'KinesisProcessor',
            handler=
            'EmailServiceLambda::FunctionHandler.HandleKinesisEventAsync',
            code=lambda_.Code.from_asset(path='../bin/EmailServiceLambda.zip'),
            timeout=core.Duration.minutes(1),
            tracing=lambda_.Tracing.ACTIVE,
            runtime=lambda_.Runtime.DOTNET_CORE_2_1,
            environment={
                'TABLE_NAME_PREFIX': auditTable.table_name,
                'EMAIL_TO': 'no-where@null'
            })

        for policy in [
                'AmazonSESFullAccess', 'AmazonDynamoDBFullAccess',
                'AWSXrayWriteOnlyAccess'
        ]:
            self.kinesis_processor.role.add_managed_policy(
                iam.ManagedPolicy.from_aws_managed_policy_name(policy))

        self.kinesis_processor.add_event_source(
            sources.KinesisEventSource(
                stream=stream,
                starting_position=lambda_.StartingPosition.LATEST,
                batch_size=10))
    def __configure_ingestion(self) -> None:
        self.__updates_stream = k.Stream(
            self,
            'PortfolioUpdates',
            encryption=k.StreamEncryption.MANAGED,
            retention_period=core.Duration.days(7),
            shard_count=1,
            stream_name='portfolio-updates')

        self.__updates_handler = PythonLambda(
            self,
            'UpdatesHandler',
            build_prefix='artifacts/FinSurf-PortfolioMgmt-UpdatesHandler',
            handler='updates_handler.lambda_handler',
            subnet_group_name='PortfolioMgmt',
            context=self.context,
            securityGroups=[self.security_group]).function

        self.updates_handler.add_event_source(source=evt.KinesisEventSource(
            stream=self.updates_stream,
            starting_position=lambda_.StartingPosition.LATEST))

        # Configure writing to neptune
        self.updates_handler.add_environment(
            key='NEPTUNE_ENDPOINT', value=self.neptune_cluster.attr_endpoint)
Esempio n. 5
0
    def __init__(self, scope: core.Construct, id: str, *, prefix: str,
                 environment: str, configuration, **kwargs):
        """
        :param scope: Stack class, used by CDK.
        :param id: ID of the construct, used by CDK.
        :param prefix: Prefix of the construct, used for naming purposes.
        :param environment: Environment of the construct, used for naming purposes.
        :param configuration: Configuration of the construct. In this case IOT_SNS_CONFIG_SCHEMA.
        :param kwargs: Other parameters that could be used by the construct.
        """
        super().__init__(scope, id, **kwargs)
        self.prefix = prefix
        self.environment_ = environment
        self._configuration = configuration

        # Validating that the payload passed is correct
        validate_configuration(
            configuration_schema=IOT_KINESIS_FIREHOSE_CONFIG_SCHEMA,
            configuration_received=self._configuration)

        # Defining Kinesis Stream
        stream_data = deepcopy(self._configuration["stream"])
        self._kinesis_stream = base_kinesis_firehose_delivery_stream(
            self, **stream_data)

        # Defining IAM Role
        role = base_kinesis_role(self,
                                 resource_name=stream_data["stream_name"],
                                 principal_resource="iot")

        # Validating Lambda Function Runtime
        functions_data = self._configuration["lambda_handlers"]
        self._lambda_functions = list()
        for setting in functions_data:
            _lambda_function = base_lambda_function(
                self, **setting["lambda_handler"])
            self._lambda_functions.append(_lambda_function)

            # Defining Function Subscription
            event_source = event_src.KinesisEventSource(
                stream=self._kinesis_stream, **setting["event_settings"])
            _lambda_function.add_event_source(event_source)

        # Defining Topic Rule properties
        action = iot.CfnTopicRule.KinesisActionProperty(
            stream_name=self._kinesis_stream.stream_name,
            role_arn=role.role_arn)
        action_property = iot.CfnTopicRule.ActionProperty(kinesis=action)

        rule_data = self._configuration["iot_rule"]
        self._iot_rule = base_iot_rule(self,
                                       action_property=action_property,
                                       **rule_data)
Esempio n. 6
0
  def __configure_ingestion(self)->None:
    self.__updates_handler = PythonLambda(
      self,'FinSurf-GraphBuilder',
      build_prefix='artifacts/FinSurf-GraphBuilder',
      handler='handlers.kinesis_event_handler',
      subnet_group_name='MarketGraph',
      context=self.context,
      securityGroups= [self.security_group]).function

    self.updates_handler.add_event_source(
      source=evt.KinesisEventSource(
        stream= self.quotes_stream,
        starting_position=lambda_.StartingPosition.TRIM_HORIZON))

    self.updates_handler.add_event_source(
      source=evt.KinesisEventSource(
        stream= self.fundamental_stream,
        starting_position=lambda_.StartingPosition.TRIM_HORIZON))

    # Configure writing to neptune
    self.updates_handler.add_environment(
      key='NEPTUNE_ENDPOINT', value=self.neptune_cluster.attr_endpoint)
    def __init__(self, app: core.App, id: str) -> None:
        super().__init__(app, id)

        with open("lambda-handler.py", encoding="utf8") as fp:
            handler_code = fp.read()

        Kstream = kinesis_.Stream(self,
                                  "KinesisSagemakerInference",
                                  encryption=None,
                                  encryption_key=None,
                                  retention_period_hours=24,
                                  shard_count=1)

        lambdaFn = lambda_.Function(
            self,
            "KinesisSMLambda",
            code=lambda_.InlineCode(handler_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(300),
            runtime=lambda_.Runtime.PYTHON_3_7,
            environment={
                "endpoint_name":
                endpoint_name,  # CHANGE TO YOUR ENDPOINT NAME!!
                "content_type": "text/csv",
                "input_data": input_data,
                "bucket": bucket,
                "key": key
            })

        lambdaFn.add_to_role_policy(
            aws_iam.PolicyStatement(
                actions=[
                    'sagemaker:InvokeEndpoint',
                ],
                resources=[
                    'arn:aws:sagemaker:{}:{}:endpoint/{}'.format(
                        my_region, my_acc_id, endpoint_name),
                ]))

        # Add the Kinesis stream as Lambda source
        lambdaFn.add_event_source(
            aws_lambda_event_sources.KinesisEventSource(
                Kstream, starting_position=lambda_.StartingPosition.LATEST))

        # Add stream read permissions
        Kstream.grant_read(lambdaFn.role)
Esempio n. 8
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        table = aws_dynamodb.Table(self, "DashboardModel",
            partition_key=aws_dynamodb.Attribute(name="Pk", type=aws_dynamodb.AttributeType.STRING),
            sort_key=aws_dynamodb.Attribute(name="Sk", type=aws_dynamodb.AttributeType.STRING),
            billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST)
        
        kds_input_stream = aws_kinesis.Stream(self, "kds_dashboard_input_stream",
            shard_count=1, 
            stream_name="kds_dashboard_input_stream")
        
        kds_output_stream = aws_kinesis.Stream(self, "kds_dashboard_output_stream",
            shard_count=1, 
            stream_name="kds_dashboard_output_stream")

        # Creating a ingest bucket for this stack
        ingest_bucket = aws_s3.Bucket(self,'dreis_dboard_ingest_bucket')

        kfh_service_role = aws_iam.Role(self, 'KFH_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com')
        )

        kfh_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kfh_service_role.add_to_policy(kfh_policy_stmt)

        #Creating firehose for this stack
        kfh_source = aws_kinesisfirehose.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty(
            kinesis_stream_arn=kds_input_stream.stream_arn,
            role_arn=kfh_service_role.role_arn
        )

        kfh_datalake = aws_kinesisfirehose.CfnDeliveryStream(self, "kfh_datalake",
            s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
                bucket_arn=ingest_bucket.bucket_arn,
                buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty(
                    interval_in_seconds=60,
                    size_in_m_bs=5),
                compression_format="UNCOMPRESSED",
                role_arn=kfh_service_role.role_arn
                ),
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration=kfh_source
        )

        kda_service_role = aws_iam.Role(self, 'KDA_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('kinesisanalytics.amazonaws.com')
        )

        kda_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kda_service_role.add_to_policy(kda_policy_stmt)

        # KA doesn't like - (dash) in names
        col1 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="state",
            sql_type="VARCHAR(2)",
            mapping="$.state"
        )

        col2 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="event_time",
            sql_type="TIMESTAMP",
            mapping="$.event-time"
        )
        
        col3 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="region",  
            sql_type="VARCHAR(12)",
            mapping="$.region"
        )

        col4 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="store_id",
            sql_type="INTEGER",
            mapping="$.store-id"
        )

        col5 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_1",
            sql_type="INTEGER",
            mapping="$.kpi-1"
        )
        
        col6 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_2",
            sql_type="INTEGER",
            mapping="$.kpi-2"
        )

        col7 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_3",
            sql_type="INTEGER",
            mapping="$.kpi-3"
        )

        col8 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_4",
            sql_type="INTEGER",
            mapping="$.kpi-4"
        )

        col9 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_5",
            sql_type="INTEGER",
            mapping="$.kpi-5"
        )

        schema = aws_kinesisanalytics.CfnApplication.InputSchemaProperty(
            record_columns=[col2, col1, col3, col4, col5, col6, col7, col8, col9],
            record_encoding="UTF-8",
            record_format=aws_kinesisanalytics.CfnApplication.RecordFormatProperty(
                record_format_type="JSON",
                mapping_parameters=aws_kinesisanalytics.CfnApplication.MappingParametersProperty(
                    json_mapping_parameters=aws_kinesisanalytics.CfnApplication.JSONMappingParametersProperty(
                        record_row_path="$"
                    )
                )
            )
        )

        kda_is = aws_kinesisanalytics.CfnApplication.KinesisStreamsInputProperty(
            resource_arn=kds_input_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        ip = aws_kinesisanalytics.CfnApplication.InputProperty(
            name_prefix="SOURCE_SQL_STREAM",
            input_schema=schema,
            kinesis_streams_input=kda_is
        )

        application_code = "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STORE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), \"store-id\" INTEGER, kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STATE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_REGION\" (\"region\" VARCHAR(10), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STORE\"" + \
            "SELECT STREAM \"region\", \"state\", \"store-id\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", \"store-id\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STATE\"" + \
            "SELECT STREAM \"region\", \"state\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_REGION\"" + \
            "SELECT STREAM \"region\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);"

        kda_app = aws_kinesisanalytics.CfnApplication(self, "kda_agg",
            inputs=[ip], #kda_inputs,
            application_code=application_code, 
            application_description="Aggregating data", 
            application_name="DashboardMetricsAggregator"
        )

        kda_output_prop = aws_kinesisanalytics.CfnApplicationOutput.KinesisStreamsOutputProperty(
            resource_arn=kds_output_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        kda_dest_schema = aws_kinesisanalytics.CfnApplicationOutput.DestinationSchemaProperty(
            record_format_type="JSON"
        )

        kda_output_prop_by_store = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STORE"
        )

        kda_output_prop_by_state = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STATE"
        )

        kda_output_prop_by_region = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_REGION"
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_store",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_store
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_state",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_state
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_region",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_region
        )

        lambda_agg_function = aws_lambda.Function(self, "AggDataLambda",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            handler="lambda_function.lambda_handler",
            code=aws_lambda.Code.asset("../models/dashboard/lambdas/aggregate_data_lambda"),
            timeout=Duration.minutes(5))

        lambda_agg_function.add_environment("DDB_TABLE_DASHBOARD", table.table_name)

        lambda_agg_function.add_to_role_policy(aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "kinesis:*"
            ],
            resources=["*"]
        ))

        table.grant_read_write_data(lambda_agg_function)

        kes = aws_lambda_event_sources.KinesisEventSource(kds_output_stream,
            starting_position=aws_lambda.StartingPosition.TRIM_HORIZON,
            batch_size=50, 
            #max_batching_window=100
        )

        lambda_agg_function.add_event_source(kes)

        core.CfnOutput(
            self, "TableName_Dashboard",
            description="Table name for Dashboard",
            value=table.table_name
        )

        core.CfnOutput(
            self, "BucketName_Dashboard",
            description="Bucket name",
            value=ingest_bucket.bucket_arn
        )

        core.CfnOutput(
            self, "KinesisInputStream_Dashboard",
            description="Kinesis input for Dashboard",
            value=kds_input_stream.stream_name
        )

        core.CfnOutput(
            self, "KinesisOutputStream_Dashboard",
            description="Kinesis output for Dashboard",
            value=kds_output_stream.stream_name
        )
        
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Kinesis to lambda
        self.stream_lambda = kinesis_lambda.KinesisStreamsToLambda(
            self,
            'clickstream',
            lambda_function_props=_lambda.FunctionProps(
                runtime=_lambda.Runtime.PYTHON_3_7,
                handler='index.lambda_handler',
                code=_lambda.Code.inline(
                    get_code('send_data_to_firehose.py'))),
            kinesis_stream_props=kinesis.StreamProps(
                stream_name='clickstream',
                retention_period=core.Duration.days(1),
                shard_count=4),
            kinesis_event_source_props=lambda_sources.KinesisEventSourceProps(
                starting_position=_lambda.StartingPosition.TRIM_HORIZON,
                batch_size=1))

        # Lambda to produce data
        self.produce_fake_data = _lambda.Function(
            self,
            'produce_data',
            runtime=_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(90),
            handler='index.lambda_handler',
            code=_lambda.Code.inline(get_code('produce_data.py')),
            environment={
                'STREAM_NAME': self.stream_lambda.kinesis_stream.stream_name
            })
        self.stream_lambda.kinesis_stream.grant_read_write(
            self.produce_fake_data)

        # EventBridge to activate my function above
        self.event_rule = events.Rule(
            self,
            'scheduledRule',
            schedule=events.Schedule.expression('rate(1 minute)'))
        self.event_rule.add_target(
            targets.LambdaFunction(self.produce_fake_data))

        # S3 Bucket
        self.bucket = s3.Bucket(self,
                                'data-clicks-lake',
                                removal_policy=core.RemovalPolicy.DESTROY,
                                auto_delete_objects=True)

        # Glue
        self.glue_db_analytical = glue.Database(
            self,
            'analytic_clickstream',
            database_name='clickstream_db',
            location_uri=None,
        )

        self.glue_table_analytical = glue.Table(
            self,
            'analytical-table',
            table_name='analytical-table',
            columns=[
                glue_column('custid', 'int'),
                glue_column('trafficfrom', 'string'),
                glue_column('url', 'string'),
                glue_column('device', 'string'),
                glue_column('touchproduct', 'int'),
                glue_column('trans_timestamp', 'string')
            ],
            database=self.glue_db_analytical,
            data_format=glue.DataFormat.PARQUET,
            bucket=self.bucket,
            s3_prefix='kinesis/',
        )

        # Firehose
        iam_role_firehose_analytical = self.create_firehose_role()
        self.bucket.grant_read_write(iam_role_firehose_analytical)

        firehose_props = FirehoseProps(
            bucket=self.bucket,
            role=iam_role_firehose_analytical,
            stream=self.stream_lambda.kinesis_stream,
            glue_db=self.glue_db_analytical,
            glue_table=self.glue_table_analytical)

        self.firehose = FirehoseLib(self, 'firehose_clickstream',
                                    firehose_props)

        # Elasticsearh
        self.es_domain = ElasticsearchLib(self,
                                          'ES-clickstream-domain').es_domain

        # Lambda to send data to Elasticsearch
        self.send_data_to_elasticsearch = lambda_python.PythonFunction(
            self,
            'clickstream_to_es',
            entry='./analytics_ml_flow/lambda/lambda_with_requirements/',
            handler='handler',
            timeout=core.Duration.seconds(180),
            index='Kinesis_ES.py',
            environment={
                'ES_HOST_HTTP': self.es_domain.domain_endpoint,
                'ES_INDEX': 'clickstream',
                'ES_IND_TYPE': 'transactions',
                'ES_REGION': 'us-west-2',
            })
        self.es_domain.grant_index_read_write('clickstream',
                                              self.send_data_to_elasticsearch)
        self.es_domain.grant_read_write(self.send_data_to_elasticsearch)

        stream_source = lambda_sources.KinesisEventSource(
            self.stream_lambda.kinesis_stream,
            starting_position=_lambda.StartingPosition.TRIM_HORIZON,
            batch_size=1)

        self.stream_lambda.kinesis_stream.grant_read(
            self.send_data_to_elasticsearch)
        self.send_data_to_elasticsearch.add_event_source(stream_source)

        # Glue Crawler
        crawler_role = self.create_crawler_permissions()
        glue_props = GlueCrawlerProps(bucket=self.bucket, role=crawler_role)
        self.glue_crawler = GlueCrawlerLib(self, 'glueCrawler', glue_props)
Esempio n. 10
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #kinesis data stream
        kinesis_stream = aws_kinesis.Stream(
            self,
            "kinesisStream",
            retention_period=core.Duration.hours(24),
            shard_count=1,
            stream_name="kinesis_test_data_pipe")

        #s3 for store stream data events
        kinesis_s3_bucket = aws_s3.Bucket(
            self, "kinesisS3Bucket", removal_policy=core.RemovalPolicy.DESTROY)

        #Lambda functions

        #import function codes - data consume
        try:
            with open("deployments/functions/stream_data_get.py",
                      mode="r") as file:
                function_body_get = file.read()
        except OSError:
            print('File can not read')

        #consume function
        stream_get_function = aws_lambda.Function(
            self,
            "consumeFunction",
            function_name="StreamConsumeFunction",
            description="Process Data Streams and store to s3",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body_get),
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "BUCKET_NAME": f"{kinesis_s3_bucket.bucket_name}"
            })

        #permision to use stream by lambda
        kinesis_stream.grant_read(stream_get_function)

        #s3 permision
        lambdas3Permision = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[f"{kinesis_s3_bucket.bucket_arn}/*"],
            actions=["s3:PutObject"])
        lambdas3Permision.sid = "S3writePermisionToLambda"
        stream_get_function.add_to_role_policy(lambdas3Permision)

        #logs
        stream_concume_logs = aws_logs.LogGroup(
            self,
            "StreamConcumeLogs",
            log_group_name=f"/aws/lambda/{stream_get_function.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)

        #kinesis event source
        kinesis_event_sources = aws_lambda_es.KinesisEventSource(
            stream=kinesis_stream,
            starting_position=aws_lambda.StartingPosition.LATEST,
            batch_size=1)

        #attached kinesis to lambda
        stream_get_function.add_event_source(kinesis_event_sources)

        #generate stream lambda function#####

        #import function codes - data consume
        try:
            with open("deployments/functions/stream_data_gen.py",
                      mode="r") as file:
                function_body_gen = file.read()
        except OSError:
            print('File can not read')

        #stream generate function
        stream_gen_function = aws_lambda.Function(
            self,
            "GenarateFunction",
            function_name="StreamGenerateFunction",
            description="Generate Data Streams",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body_gen),
            timeout=core.Duration.seconds(60),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "STREAM_NAME": f"{kinesis_stream.stream_name}"
            })

        #permision to lambda to write into kinesis
        kinesis_stream.grant_read_write(stream_gen_function)

        #logs
        stream_generate_logs = aws_logs.LogGroup(
            self,
            "StreamGenerateLogs",
            log_group_name=f"/aws/lambda/{stream_gen_function.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Add your stack resources below):
        # Create Kinesis Data Stream
        stream_data_pipe = _kinesis.Stream(
            self,
            "streamDataPipe",
            retention_period=core.Duration.hours(24),
            shard_count=1,
            stream_name="data_pipe")

        # Create an S3 Bucket for storing streaming data events
        stream_data_store = _s3.Bucket(
            self, "streamDataLake", removal_policy=core.RemovalPolicy.DESTROY)

        # Read Lambda Code
        try:
            with open(
                    "advanced_use_cases/lambda_src/stream_record_consumer.py",
                    mode="r") as f:
                stream_consumer_fn_code = f.read()
        except OSError:
            print("Unable to read lambda function code")

        # Deploy the lambda function
        stream_consumer_fn = _lambda.Function(
            self,
            "streamConsumerFn",
            function_name="stream_consumer_fn",
            description=
            "Process streaming data events from kinesis and store in S3",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="index.lambda_handler",
            code=_lambda.InlineCode(stream_consumer_fn_code),
            timeout=core.Duration.seconds(3),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "BUCKET_NAME": f"{stream_data_store.bucket_name}"
            })

        # Update Lambda Permissions To Use Stream
        stream_data_pipe.grant_read(stream_consumer_fn)

        # Add permissions to lambda to write to S3
        roleStmt1 = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[f"{stream_data_store.bucket_arn}/*"],
            actions=["s3:PutObject"])
        roleStmt1.sid = "AllowLambdaToWriteToS3"
        stream_consumer_fn.add_to_role_policy(roleStmt1)

        # Create Custom Loggroup for Consumer
        stream_consumer_lg = _logs.LogGroup(
            self,
            "streamConsumerLogGroup",
            log_group_name=f"/aws/lambda/{stream_consumer_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY)

        # Create New Kinesis Event Source
        stream_data_pipe_event_source = _lambda_event_sources.KinesisEventSource(
            stream=stream_data_pipe,
            starting_position=_lambda.StartingPosition.LATEST,
            batch_size=1)

        # Attach Kinesis Event Source To Lambda
        stream_consumer_fn.add_event_source(stream_data_pipe_event_source)

        ########################################
        #######                          #######
        #######   Stream Data Producer   #######
        #######                          #######
        ########################################

        # Read Lambda Code
        try:
            with open("advanced_use_cases/lambda_src/stream_data_producer.py",
                      mode="r") as f:
                data_producer_fn_code = f.read()
        except OSError:
            print("Unable to read lambda function code")

        # Deploy the lambda function
        data_producer_fn = _lambda.Function(
            self,
            "streamDataProducerFn",
            function_name="data_producer_fn",
            description=
            "Produce streaming data events and push to Kinesis stream",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="index.lambda_handler",
            code=_lambda.InlineCode(data_producer_fn_code),
            timeout=core.Duration.seconds(60),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "STREAM_NAME": f"{stream_data_pipe.stream_name}"
            })

        # Grant our Lambda Producer privileges to write to Kinesis Data Stream
        stream_data_pipe.grant_read_write(data_producer_fn)

        # Create Custom Loggroup for Producer
        data_producer_lg = _logs.LogGroup(
            self,
            "dataProducerLogGroup",
            log_group_name=f"/aws/lambda/{data_producer_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY)