def createWithdrawWorkflow(self): payInvoiceFailed = tasks.LambdaInvoke(self, 'payInvoiceFailed', lambda_function=self.createLambda('payInvoiceFailedLambda', 'payInvoiceFailed.payInvoiceFailed'), timeout=cdk.Duration.seconds(300) ).next(sfn.Fail(self, 'tipErrorState')) payInvoiceSucceeded = tasks.LambdaInvoke(self, 'payInvoiceSucceeded', lambda_function=self.createLambda('payInvoiceSucceededLambda', 'payInvoiceSucceeded.payInvoiceSucceeded'), timeout=cdk.Duration.seconds(300) ).next(sfn.Succeed(self, 'tipSuccessState')) self.payInvoice = tasks.StepFunctionsInvokeActivity(self, 'payInvoice', activity=sfn.Activity(self, 'payInvoiceActivity'), heartbeat=cdk.Duration.seconds(86400), timeout=cdk.Duration.seconds(86400), ) self.payInvoice.add_retry( backoff_rate=2, errors=['States.Timeout'], interval=cdk.Duration.seconds(600), max_attempts=0 ) self.payInvoice.add_catch( handler=payInvoiceFailed, errors=['States.ALL'], result_path='$.errorInfo' ) self.payInvoice.next(payInvoiceSucceeded) return sfn.StateMachine(self, 'withdrawWorkflow', definition=self.payInvoice, role=self.statesRole )
def createTipWorkflow(self): notifyTipper = tasks.LambdaInvoke(self, 'notifyTipper', lambda_function=self.createLambda('notifyTipperLambda', 'tipNotifier.tipNotifier'), timeout=cdk.Duration.seconds(300) ).next(sfn.Succeed(self, 'withdrawSuccessState')) self.getTipperInvoice = tasks.StepFunctionsInvokeActivity(self, 'getTipperInvoice', activity=sfn.Activity(self, 'getTipperInvoiceActivity'), heartbeat=cdk.Duration.seconds(60), timeout=cdk.Duration.seconds(86400), ) self.getTipperInvoice.add_retry( backoff_rate=1.5, errors=['States.Timeout'], interval=cdk.Duration.seconds(60), max_attempts=7 ) self.getTipperInvoice.add_catch( handler=sfn.Fail(self, 'withdrawErrorState'), errors=['States.ALL'], result_path='$.errorInfo' ) self.getTipperInvoice.next(notifyTipper) return sfn.StateMachine(self, 'tipWorkflow', definition=self.getTipperInvoice, role=self.statesRole )
def __init__(self, scope: core.Construct, id: str, *, message: sfn.TaskInput, subject: Optional[str] = None, topic: Optional[sns.Topic] = None, result_path: str = '$.PublishResult', output_path: str = '$', cause: Optional[str] = None, comment: Optional[str] = None, error: Optional[str] = None): super().__init__(scope, id) self._end = sfn.Fail(self, 'Execution Failed', cause=cause, comment=comment, error=error) if topic is not None: self._start = sfn_tasks.SnsPublish( self, 'Failure Notification', input_path='$', output_path=output_path, result_path=result_path, topic=topic, message=message, subject=subject, ) self._start.next(self._end) else: self._start = self._end
def __init__(self, scope: core.App, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) pass_through_lambda = _lambda.Function( self, 'PassThroughLambda', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('lambda'), handler='pass_through_lambda.handler') loop_count_lambda = _lambda.Function( self, 'LoopCountLambda', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('lambda'), handler='loop_count_lambda.handler') start_state_machine = sfn.Task( self, "Start CodeBuild Lambda", task=sfn_tasks.InvokeFunction(pass_through_lambda)) wait_x = sfn.Wait( self, "Wait X Seconds", time=sfn.WaitTime.seconds_path('$.wait_time'), ) get_state_machine_status = sfn.Task( self, "Get Build Status", task=sfn_tasks.InvokeFunction(loop_count_lambda)) is_complete = sfn.Choice(self, "Job Complete?") state_machine_failed = sfn.Fail(self, "Build Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED") state_machine_success = sfn.Pass(self, "Build Successs") definition = start_state_machine\ .next(wait_x)\ .next(get_state_machine_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "FAILED"), state_machine_failed) .when(sfn.Condition.string_equals( "$.status", "SUCCEEDED"), state_machine_success) .otherwise(wait_x)) sfn.StateMachine( self, "StateMachine", definition=definition, timeout=core.Duration.seconds(60), )
def __init__(self, scope: core.Construct, id: str, *, polling_delay: int = 5, statemachine_timeout: int = 300, **kwargs): super().__init__(scope, id, **kwargs) state_fn = StateHandlerLambda(self, "config-state-handler").function config_fn = AccountConfigLambda(self, "account-config-handler").function config_state = tasks.LambdaInvoke(self, "Set Configuring State", lambda_function=state_fn, output_path="$.Payload") completed_state = tasks.LambdaInvoke(self, "Set Completed State", lambda_function=state_fn, output_path="$.Payload") config_task = tasks.LambdaInvoke(self, "Request Account Configuration", lambda_function=config_fn, output_path="$.Payload") polling_task = tasks.LambdaInvoke(self, "Poll Account Configuration", lambda_function=config_fn, output_path="$.Payload") delay = sfn.Wait(self, "Delay Polling", time=sfn.WaitTime.duration( core.Duration.seconds(polling_delay))) is_ready = sfn.Choice(self, "Account Ready?") acct_ready = sfn.Condition.string_equals('$.state', "READY") acct_pending = sfn.Condition.string_equals('$.state', "PENDING") success = sfn.Succeed(self, "Config Succeeded") failed = sfn.Fail(self, "Config Failed", cause="Bad value in Polling loop") # this is the loop which polls for state change, either looping back to delay or setting completion state and finishing is_ready.when(acct_pending, delay).when( acct_ready, completed_state.next(success)).otherwise(failed) # this is the main chain starting with creation request a delay and then polling loop config_chain = config_task.next(config_state).next(delay).next( polling_task).next(is_ready) self.state_machine = sfn.StateMachine( self, "Account-Config-StateMachine", definition=config_chain, timeout=core.Duration.seconds(statemachine_timeout))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Step Function Starts Here # The first thing we need to do is see if they are asking for pineapple on a pizza pineapple_check_lambda = _lambda.Function(self, "pineappleCheckLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="orderPizza.handler", code=_lambda.Code.from_asset("lambdas"), ) # Step functions are built up of steps, we need to define our first step order_pizza = step_fn.Task(self, 'Order Pizza Job', task=step_fn_tasks.InvokeFunction(pineapple_check_lambda), input_path='$.flavour', result_path='$.pineappleAnalysis' ) # Pizza Order failure step defined job_failed = step_fn.Fail(self, 'Sorry, We Dont add Pineapple', cause='Failed To Make Pizza', error='They asked for Pineapple') # If they didnt ask for pineapple let's cook the pizza cook_pizza = step_fn.Pass(self, 'Lets make your pizza') # If they ask for a pizza with pineapple, fail. Otherwise cook the pizza definition = step_fn.Chain \ .start(order_pizza) \ .next(step_fn.Choice(self, 'With Pineapple?') \ .when(step_fn.Condition.boolean_equals('$.pineappleAnalysis.containsPineapple', True), job_failed) \ .otherwise(cook_pizza)) state_machine = step_fn.StateMachine(self, 'StateMachine', definition=definition, timeout=core.Duration.minutes(5)) # Dead Letter Queue Setup dlq = sqs.Queue(self, 'stateMachineLambdaDLQ', visibility_timeout=core.Duration.seconds(300)) # defines an AWS Lambda resource to connect to our API Gateway state_machine_lambda = _lambda.Function(self, "stateMachineLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="stateMachineLambda.handler", code=_lambda.Code.from_asset("lambdas"), environment={ 'statemachine_arn': state_machine.state_machine_arn } ) state_machine.grant_start_execution(state_machine_lambda) # defines an API Gateway REST API resource backed by our "sqs_publish_lambda" function. api_gw.LambdaRestApi(self, 'Endpoint', handler=state_machine_lambda )
def __init__(self, app: cdk.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) submit_job_activity = sfn.Activity( self, "SubmitJob" ) check_job_activity = sfn.Activity( self, "CheckJob" ) submit_job = sfn.Task( self, "Submit Job", task=sfn_tasks.InvokeActivity(submit_job_activity), result_path="$.guid", ) wait_x = sfn.Wait( self, "Wait X Seconds", duration=sfn.WaitDuration.seconds_path('$.wait_time'), ) get_status = sfn.Task( self, "Get Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", result_path="$.status", ) is_complete = sfn.Choice( self, "Job Complete?" ) job_failed = sfn.Fail( self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED" ) final_status = sfn.Task( self, "Get Final Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", ) definition = submit_job\ .next(wait_x)\ .next(get_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "FAILED"), job_failed) .when(sfn.Condition.string_equals( "$.status", "SUCCEEDED"), final_status) .otherwise(wait_x)) sfn.StateMachine( self, "StateMachine", definition=definition, timeout_sec=30, )
def __init__(self, scope: core.Construct, id: builtins.str, action_name: str, resources: FsiSharedResources, function: lambda_.Function) -> None: super().__init__(scope, id) self.__resources = resources state_machine_name = id # Define the state machine definition... invoke_function = sft.LambdaInvoke( self, 'InvokeFunction', lambda_function=function, invocation_type=sft.LambdaInvocationType.REQUEST_RESPONSE, input_path='$.Payload', result_path='$.Result') choice = sf.Choice(self, 'IsComplete', comment='Check if theres more to process') choice.when( sf.Condition.string_equals('$.Result.Payload.Result.RunState', 'RunStatus.MORE_AVAILABLE'), invoke_function) choice.when( sf.Condition.string_equals('$.Result.Payload.Result.RunState', 'RunStatus.COMPLETE'), sf.Pass(self, 'Finalize', comment='Workflow Complete')) choice.otherwise( sf.Fail(self, 'NotImplemented', cause='Unknown Choice', error='NotImplementedException')) definition = invoke_function.next(choice) # Register the definition as StateMachine... zone_name = self.resources.landing_zone.zone_name self.state_machine = sf.StateMachine( self, 'StateMachine', state_machine_name=state_machine_name, state_machine_type=sf.StateMachineType.STANDARD, timeout=core.Duration.hours(2), logs=sf.LogOptions(destination=logs.LogGroup( self, 'LogGroup', removal_policy=core.RemovalPolicy.DESTROY, retention=RetentionDays.TWO_WEEKS, log_group_name='/homenet/fsi-{}/states/{}/{}'.format( zone_name, self.component_name, action_name).lower())), tracing_enabled=True, definition=definition)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) api = apigateway.RestApi( scope=self, id=f'{constants.PREFIX}-approval-api', rest_api_name='Human approval endpoint', description='HTTP Endpoint backed by API Gateway and Lambda', endpoint_types=[apigateway.EndpointType.REGIONAL], ) v1 = api.root.add_resource("v1") approve_api = v1.add_resource("approve") ################################################# email_topic = sns.Topic( scope=self, id=f'{constants.PREFIX}-email-topic', ) email_topic.add_subscription( subscription=subscriptions.EmailSubscription( email_address=constants.EMAIL_APPROVER, )) ################################################# submit_job_lambda = _lambda.Function( scope=self, id=f'{constants.PREFIX}-submit-lambda', runtime=_lambda.Runtime.PYTHON_3_8, handler='submit.handler', environment={ "TOPIC_ARN": email_topic.topic_arn, "END_POINT": approve_api.url, "TO_ADDRESS": constants.EMAIL_RECIPIENT, "FROM_ADDRESS": constants.EMAIL_SENDER, }, code=_lambda.Code.from_asset( os.path.join('lambdas', 'submit-lambda')), ) email_topic.grant_publish(submit_job_lambda) submit_job_lambda.add_to_role_policy(statement=iam.PolicyStatement( actions=['ses:Send*'], resources=['*'], )) submit_job = tasks.LambdaInvoke( scope=self, id=f'{constants.PREFIX}-submit-job', lambda_function=submit_job_lambda, integration_pattern=sfn.IntegrationPattern.WAIT_FOR_TASK_TOKEN, heartbeat=core.Duration.minutes(5), payload=sfn.TaskInput.from_object({ "token": sfn.Context.task_token, "data": sfn.Data.string_at('$'), }), ) success = sfn.Succeed(scope=self, id=f'{constants.PREFIX}-success', comment='We did it!') fail = sfn.Fail(scope=self, id=f'{constants.PREFIX}-fail', error='WorkflowFailure', cause='Something went wrong') choice = sfn.Choice(scope=self, id=f'{constants.PREFIX}-choice', comment='Was it approved?') choice.when(condition=sfn.Condition.string_equals("$.status", "OK"), next=success) choice.otherwise(fail) definition = submit_job.next(choice) self._state_machine = sfn.StateMachine( scope=self, id=f'{constants.PREFIX}-state-machine', definition=definition, # only 10 mins to approve better be quick timeout=core.Duration.minutes(10)) ################################################# approval_lambda = _lambda.Function( scope=self, id=f'{constants.PREFIX}-approval-lambda', runtime=_lambda.Runtime.PYTHON_3_8, handler='approve.handler', code=_lambda.Code.from_asset( os.path.join('lambdas', 'approve-lambda')), ) approval_lambda.add_to_role_policy(statement=iam.PolicyStatement( actions=['states:Send*'], resources=['*'])) approve_integration = apigateway.LambdaIntegration(approval_lambda) approve_api_get_method = approve_api.add_method( http_method="GET", api_key_required=False, integration=approve_integration, )
def __init__(self, app: App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) # Lambda Handlers Definitions submit_lambda = _lambda.Function( self, 'submitLambda', handler='lambda_function.lambda_handler', runtime=_lambda.Runtime.PYTHON_3_9, code=_lambda.Code.from_asset('lambdas/submit')) status_lambda = _lambda.Function( self, 'statusLambda', handler='lambda_function.lambda_handler', runtime=_lambda.Runtime.PYTHON_3_9, code=_lambda.Code.from_asset('lambdas/status')) # Step functions Definition submit_job = _aws_stepfunctions_tasks.LambdaInvoke( self, "Submit Job", lambda_function=submit_lambda, output_path="$.Payload", ) wait_job = _aws_stepfunctions.Wait( self, "Wait 30 Seconds", time=_aws_stepfunctions.WaitTime.duration(Duration.seconds(30))) status_job = _aws_stepfunctions_tasks.LambdaInvoke( self, "Get Status", lambda_function=status_lambda, output_path="$.Payload", ) fail_job = _aws_stepfunctions.Fail(self, "Fail", cause='AWS Batch Job Failed', error='DescribeJob returned FAILED') succeed_job = _aws_stepfunctions.Succeed( self, "Succeeded", comment='AWS Batch Job succeeded') # Create Chain definition = submit_job.next(wait_job)\ .next(status_job)\ .next(_aws_stepfunctions.Choice(self, 'Job Complete?') .when(_aws_stepfunctions.Condition.string_equals('$.status', 'FAILED'), fail_job) .when(_aws_stepfunctions.Condition.string_equals('$.status', 'SUCCEEDED'), succeed_job) .otherwise(wait_job)) # Create state machine sm = _aws_stepfunctions.StateMachine( self, "StateMachine", definition=definition, timeout=Duration.minutes(5), )
def __init__(self, scope: core.Construct, id: str, group_name: str, minute_duration: int, **kwargs) -> None: super().__init__(scope, id, **kwargs) # TODO: Setup alerting of failure to an SNS # TODO: Failure is not the same as a student not in a group # TODO: Streamline input data so that lambda's only get the info they really need # TODO: Comment # TODO: Need to separate unexpected errors from regular errors # Setting up monitoring schedule_stop = lambda_.Function( self, id="ScheduleStopLambda", runtime=lambda_.Runtime.PYTHON_3_7, code=lambda_.Code.from_inline( open("./resources/schedule-termination.py", 'r').read()), handler="index.handler", log_retention=logs.RetentionDays.ONE_DAY, environment=dict(GROUP_NAME=group_name), timeout=core.Duration.seconds(30)) schedule_stop.add_to_role_policy( statement=iam.PolicyStatement(actions=[ "ec2:Describe*", "iam:ListGroupsForUser", "iam:ListUsers" ], effect=iam.Effect.ALLOW, resources=["*"])) terminate_ec2 = lambda_.Function( self, id="TerminateEC2", runtime=lambda_.Runtime.PYTHON_3_7, code=lambda_.Code.from_inline( open("./resources/terminate-ec2.py", 'r').read()), handler="index.handler", log_retention=logs.RetentionDays.ONE_DAY, timeout=core.Duration.seconds(30)) terminate_ec2.add_to_role_policy( statement=iam.PolicyStatement(actions=[ "ec2:DescribeInstance*", "ec2:TerminateInstances", ], effect=iam.Effect.ALLOW, resources=["*"])) # The lambda object that will see if we should schedule. schedule_stop_task = tasks.LambdaInvoke( self, id='schedule stop', lambda_function=schedule_stop, input_path="$.detail.userIdentity", result_path="$.Payload", ) # TODO: Need to change this based on the configuration info above # Wait state to try and delete # wait_x = sfn.Wait(self, 'Wait x minutes', time=sfn.WaitTime.seconds_path("10")) wait_x = sfn.Wait(self, id='Wait x minutes', time=sfn.WaitTime.duration( core.Duration.minutes(minute_duration))) job_failed = sfn.Fail(self, id="Failed Job", cause="Error in the input", error="Error") job_finished = sfn.Succeed(self, id="Job Finished") choice = sfn.Choice(self, 'Can I delete') choice.when(sfn.Condition.boolean_equals('$.Payload.Payload', False), job_finished) choice.otherwise(wait_x) terminate_ec2_task = tasks.LambdaInvoke( self, 'terminate', lambda_function=terminate_ec2, input_path="$.detail.responseElements.instancesSet") wait_x.next(terminate_ec2_task).next(job_finished) state_definition = schedule_stop_task \ .next(choice) terminate_machine = sfn.StateMachine(self, id="State Machine", definition=state_definition) cloudwatch.Alarm(self, "EC2ScheduleAlarm", metric=terminate_machine.metric_failed(), threshold=1, evaluation_periods=1) # TODO Build Rule that monitors for EC2 creation # Any new creation, the EC2 will have to be destroyed. Including # other things? create_event = events.Rule( self, id='detect-ec2-start', description="Detects if an EC2 is created", enabled=True, event_pattern=events.EventPattern( detail_type=["AWS API Call via CloudTrail"], source=["aws.ec2"], detail={ "eventName": ["RunInstances"], "eventSource": ["ec2.amazonaws.com"] }), targets=[targets.SfnStateMachine(terminate_machine)])
def __init__( self, scope: Construct, stack_id: str, *, botocore_lambda_layer: aws_lambda_python.PythonLayerVersion, env_name: str, storage_bucket: aws_s3.Bucket, validation_results_table: Table, ) -> None: # pylint: disable=too-many-locals, too-many-statements super().__init__(scope, stack_id) ############################################################################################ # PROCESSING ASSETS TABLE processing_assets_table = Table( self, f"{env_name}-processing-assets", env_name=env_name, parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME, sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), ) ############################################################################################ # BATCH JOB DEPENDENCIES batch_job_queue = BatchJobQueue( self, "batch-job-queue", env_name=env_name, processing_assets_table=processing_assets_table, ).job_queue s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess" ) ############################################################################################ # UPDATE CATALOG UPDATE MESSAGE QUEUE dead_letter_queue = aws_sqs.Queue( self, "dead-letter-queue", visibility_timeout=LAMBDA_TIMEOUT, ) self.message_queue = aws_sqs.Queue( self, "update-catalog-message-queue", visibility_timeout=LAMBDA_TIMEOUT, dead_letter_queue=aws_sqs.DeadLetterQueue(max_receive_count=3, queue=dead_letter_queue), ) self.message_queue_name_parameter = aws_ssm.StringParameter( self, "update-catalog-message-queue-name", string_value=self.message_queue.queue_name, description=f"Update Catalog Message Queue Name for {env_name}", parameter_name=ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME.value, ) populate_catalog_lambda = BundledLambdaFunction( self, "populate-catalog-bundled-lambda-function", directory="populate_catalog", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, botocore_lambda_layer=botocore_lambda_layer, ) self.message_queue.grant_consume_messages(populate_catalog_lambda) populate_catalog_lambda.add_event_source( SqsEventSource(self.message_queue, batch_size=1) # type: ignore[arg-type] ) ############################################################################################ # STATE MACHINE TASKS check_stac_metadata_task = LambdaTask( self, "check-stac-metadata-task", directory="check_stac_metadata", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) assert check_stac_metadata_task.lambda_function.role check_stac_metadata_task.lambda_function.role.add_managed_policy( policy=s3_read_only_access_policy ) for table in [processing_assets_table, validation_results_table]: table.grant_read_write_data(check_stac_metadata_task.lambda_function) table.grant( check_stac_metadata_task.lambda_function, "dynamodb:DescribeTable", ) content_iterator_task = LambdaTask( self, "content-iterator-task", directory="content_iterator", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{CONTENT_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) check_files_checksums_directory = "check_files_checksums" check_files_checksums_default_payload_object = { f"{DATASET_ID_KEY}.$": f"$.{DATASET_ID_KEY}", f"{VERSION_ID_KEY}.$": f"$.{VERSION_ID_KEY}", f"{METADATA_URL_KEY}.$": f"$.{METADATA_URL_KEY}", f"{FIRST_ITEM_KEY}.$": f"$.{CONTENT_KEY}.{FIRST_ITEM_KEY}", f"{ASSETS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{ASSETS_TABLE_NAME_KEY}", f"{RESULTS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{RESULTS_TABLE_NAME_KEY}", } check_files_checksums_single_task = BatchSubmitJobTask( self, "check-files-checksums-single-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], ) array_size = int( aws_stepfunctions.JsonPath.number_at(f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}") ) check_files_checksums_array_task = BatchSubmitJobTask( self, "check-files-checksums-array-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], array_size=array_size, ) for reader in [ content_iterator_task.lambda_function, check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: processing_assets_table.grant_read_data(reader) # type: ignore[arg-type] processing_assets_table.grant( reader, "dynamodb:DescribeTable" # type: ignore[arg-type] ) for writer in [ check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: validation_results_table.grant_read_write_data(writer) # type: ignore[arg-type] validation_results_table.grant( writer, "dynamodb:DescribeTable" # type: ignore[arg-type] ) validation_summary_task = LambdaTask( self, "validation-summary-task", directory="validation_summary", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{VALIDATION_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(validation_summary_task.lambda_function) validation_results_table.grant( validation_summary_task.lambda_function, "dynamodb:DescribeTable" ) import_dataset_role = aws_iam.Role( self, "import-dataset", assumed_by=aws_iam.ServicePrincipal( # type: ignore[arg-type] "batchoperations.s3.amazonaws.com" ), ) import_asset_file_function = ImportFileFunction( self, directory="import_asset_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_metadata_file_function = ImportFileFunction( self, directory="import_metadata_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_dataset_task = LambdaTask( self, "import-dataset-task", directory="import_dataset", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{IMPORT_DATASET_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement( resources=[import_dataset_role.role_arn], actions=["iam:PassRole"], ), ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"]) ) for table in [processing_assets_table]: table.grant_read_data(import_dataset_task.lambda_function) table.grant(import_dataset_task.lambda_function, "dynamodb:DescribeTable") # Import status check wait_before_upload_status_check = Wait( self, "wait-before-upload-status-check", time=WaitTime.duration(Duration.seconds(10)), ) upload_status_task = LambdaTask( self, "upload-status", directory="upload_status", botocore_lambda_layer=botocore_lambda_layer, result_path="$.upload_status", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(upload_status_task.lambda_function) validation_results_table.grant(upload_status_task.lambda_function, "dynamodb:DescribeTable") upload_status_task.lambda_function.add_to_role_policy(ALLOW_DESCRIBE_ANY_S3_JOB) # Parameters import_asset_file_function_arn_parameter = aws_ssm.StringParameter( self, "import asset file function arn", string_value=import_asset_file_function.function_arn, description=f"Import asset file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value, ) import_metadata_file_function_arn_parameter = aws_ssm.StringParameter( self, "import metadata file function arn", string_value=import_metadata_file_function.function_arn, description=f"Import metadata file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value, ) import_dataset_role_arn_parameter = aws_ssm.StringParameter( self, "import dataset role arn", string_value=import_dataset_role.role_arn, description=f"Import dataset role ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN.value, ) update_dataset_catalog = LambdaTask( self, "update-dataset-catalog", directory="update_dataset_catalog", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) self.message_queue.grant_send_messages(update_dataset_catalog.lambda_function) for storage_writer in [ import_dataset_role, import_dataset_task.lambda_function, import_asset_file_function, import_metadata_file_function, populate_catalog_lambda, update_dataset_catalog.lambda_function, ]: storage_bucket.grant_read_write(storage_writer) # type: ignore[arg-type] grant_parameter_read_access( { import_asset_file_function_arn_parameter: [import_dataset_task.lambda_function], import_dataset_role_arn_parameter: [import_dataset_task.lambda_function], import_metadata_file_function_arn_parameter: [import_dataset_task.lambda_function], processing_assets_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, import_dataset_task.lambda_function, ], validation_results_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, validation_summary_task.lambda_function, upload_status_task.lambda_function, ], self.message_queue_name_parameter: [update_dataset_catalog.lambda_function], } ) success_task = aws_stepfunctions.Succeed(self, "success") upload_failure = aws_stepfunctions.Fail(self, "upload failure") validation_failure = aws_stepfunctions.Succeed(self, "validation failure") ############################################################################################ # STATE MACHINE dataset_version_creation_definition = ( check_stac_metadata_task.next(content_iterator_task) .next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "check_files_checksums_maybe_array" ) .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}", 1 ), check_files_checksums_single_task.batch_submit_job, ) .otherwise(check_files_checksums_array_task.batch_submit_job) .afterwards() ) .next( aws_stepfunctions.Choice(self, "content_iteration_finished") .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{NEXT_ITEM_KEY}", -1 ), validation_summary_task.next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "validation_successful" ) .when( aws_stepfunctions.Condition.boolean_equals( f"$.{VALIDATION_KEY}.{SUCCESS_KEY}", True ), import_dataset_task.next( wait_before_upload_status_check # type: ignore[arg-type] ) .next(upload_status_task) .next( aws_stepfunctions.Choice( self, "import_completed" # type: ignore[arg-type] ) .when( aws_stepfunctions.Condition.and_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Complete" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Complete", ), ), update_dataset_catalog.next( success_task # type: ignore[arg-type] ), ) .when( aws_stepfunctions.Condition.or_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Failed" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Failed", ), ), upload_failure, # type: ignore[arg-type] ) .otherwise( wait_before_upload_status_check # type: ignore[arg-type] ) ), ) .otherwise(validation_failure) # type: ignore[arg-type] ), ) .otherwise(content_iterator_task) ) ) self.state_machine = aws_stepfunctions.StateMachine( self, f"{env_name}-dataset-version-creation", definition=dataset_version_creation_definition, # type: ignore[arg-type] ) self.state_machine_parameter = aws_ssm.StringParameter( self, "state machine arn", description=f"State machine ARN for {env_name}", parameter_name=ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value, # pylint:disable=line-too-long string_value=self.state_machine.state_machine_arn, ) Tags.of(self).add("ApplicationLayer", "processing") # type: ignore[arg-type]
def __init__(self, scope: core.Construct, id: str, redshift_cluster_name: str, user_secret: Secret) -> None: super().__init__(scope, id) stack = Stack.of(self) subprocess.call( ['pip', 'install', '-t', 'dwh/dwh_loader_layer/python/lib/python3.8/site-packages', '-r', 'dwh/dwh_loader/requirements.txt', '--platform', 'manylinux1_x86_64', '--only-binary=:all:', '--upgrade']) requirements_layer = _lambda.LayerVersion(scope=self, id='PythonRequirementsTemplate', code=_lambda.Code.from_asset('dwh/dwh_loader_layer'), compatible_runtimes=[_lambda.Runtime.PYTHON_3_8]) dwh_loader_role = _iam.Role( self, 'Role', assumed_by=_iam.ServicePrincipal('lambda.amazonaws.com') ) dwh_loader_role.add_managed_policy(_iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole' )) dwh_loader_role.attach_inline_policy( _iam.Policy( self, 'InlinePolicy', statements=[ _iam.PolicyStatement( actions=[ "redshift-data:ExecuteStatement", "redshift-data:CancelStatement", "redshift-data:ListStatements", "redshift-data:GetStatementResult", "redshift-data:DescribeStatement", "redshift-data:ListDatabases", "redshift-data:ListSchemas", "redshift-data:ListTables", "redshift-data:DescribeTable" ], resources=['*'] ), _iam.PolicyStatement( actions=["secretsmanager:GetSecretValue"], resources=[user_secret.secret_arn] ), _iam.PolicyStatement( actions=["redshift:GetClusterCredentials"], resources=[ "arn:aws:redshift:*:*:dbname:*/*", "arn:aws:redshift:*:*:dbuser:*/"+_config.Redshift.ETL_USER ] ), _iam.PolicyStatement( effect=_iam.Effect('DENY'), actions=["redshift:CreateClusterUser"], resources=["arn:aws:redshift:*:*:dbuser:*/"+_config.Redshift.ETL_USER] ), _iam.PolicyStatement( conditions={ 'StringLike': { "iam:AWSServiceName": "redshift-data.amazonaws.com" } }, actions=["iam:CreateServiceLinkedRole"], resources=["arn:aws:iam::*:role/aws-service-role/redshift-data.amazonaws.com/AWSServiceRoleForRedshift"] ), ] ) ) dwh_loader_function = _lambda.Function( self, 'Lambda', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset('dwh/dwh_loader'), handler='dwh_loader.handler', function_name='dwh-loader', environment={ 'CLUSTER_NAME': redshift_cluster_name, 'PROCEDURE': _config.Redshift.ETL_PROCEDURE, 'SECRET_ARN': user_secret.secret_arn, 'DATABASE': _config.Redshift.DATABASE, 'REGION': core.Aws.REGION, 'SCHEMA': _config.Redshift.SCHEMA }, layers=[requirements_layer], timeout=core.Duration.seconds(30), role=dwh_loader_role ) dwh_loader_submit = _sfn_tasks.LambdaInvoke( self, 'Submit', lambda_function=dwh_loader_function, payload_response_only=True ) dwh_loader_wait = _sfn.Wait( self, 'Wait', time=_sfn.WaitTime.duration(core.Duration.seconds(30)) ) dwh_loader_complete = _sfn.Choice( self, 'Complete' ) dwh_loader_failed = _sfn.Fail( self, 'Fail', cause="Redshift Data API statement failed", error="$.Result.Error" ) dwh_loader_status = _sfn_tasks.LambdaInvoke( self, 'Status', lambda_function=dwh_loader_function, result_path='$.Result', payload_response_only=True ) definition = dwh_loader_submit \ .next(dwh_loader_wait) \ .next(dwh_loader_status) \ .next(dwh_loader_complete .when(_sfn.Condition.string_equals('$.Result.Status', 'FAILED'), dwh_loader_failed) .when(_sfn.Condition.string_equals('$.Result.Status', 'FINISHED'), _sfn.Succeed(self, 'DwhLoaderSuccess')) .otherwise(dwh_loader_wait)) dwh_loader_stepfunctions = _sfn.StateMachine( self, 'StepFunctions', definition=definition, timeout=core.Duration.minutes(30) ) step_trigger = _events.Rule( self, 'StepTrigger', schedule=_events.Schedule.cron(minute='0/30', hour='*', month='*', week_day='*', year='*') ) step_trigger.add_target( _events_targets.SfnStateMachine( machine=dwh_loader_stepfunctions, ) )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Step Function Starts Here # The first thing we need to do is see if they are asking for pineapple on a pizza pineapple_check_lambda = _lambda.Function( self, "pineappleCheckLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="orderPizza.handler", code=_lambda.Code.from_asset("lambda_fns"), ) # Step functions are built up of steps, we need to define our first step order_pizza = step_fn_tasks.LambdaInvoke( self, 'Order Pizza Job', lambda_function=pineapple_check_lambda, input_path='$.flavour', result_path='$.pineappleAnalysis', payload_response_only=True) # Pizza Order failure step defined pineapple_detected = step_fn.Fail(self, 'Sorry, We Dont add Pineapple', cause='They asked for Pineapple', error='Failed To Make Pizza') # If they didnt ask for pineapple let's cook the pizza cook_pizza = step_fn.Succeed(self, 'Lets make your pizza', output_path='$.pineappleAnalysis') # If they ask for a pizza with pineapple, fail. Otherwise cook the pizza definition = step_fn.Chain \ .start(order_pizza) \ .next(step_fn.Choice(self, 'With Pineapple?') .when(step_fn.Condition.boolean_equals('$.pineappleAnalysis.containsPineapple', True), pineapple_detected) .otherwise(cook_pizza)) state_machine = step_fn.StateMachine( self, 'StateMachine', definition=definition, timeout=core.Duration.minutes(5), tracing_enabled=True, state_machine_type=step_fn.StateMachineType.EXPRESS) # HTTP API Definition # Give our gateway permissions to interact with SNS http_api_role = iam.Role( self, 'HttpApiRole', assumed_by=iam.ServicePrincipal('apigateway.amazonaws.com'), inline_policies={ "AllowSFNExec": iam.PolicyDocument(statements=[ iam.PolicyStatement( actions=["states:StartSyncExecution"], effect=iam.Effect.ALLOW, resources=[state_machine.state_machine_arn]) ]) }) api = api_gw.HttpApi(self, 'the_state_machine_api', create_default_stage=True) # create an AWS_PROXY integration between the HTTP API and our Step Function integ = api_gw.CfnIntegration( self, 'Integ', api_id=api.http_api_id, integration_type='AWS_PROXY', connection_type='INTERNET', integration_subtype='StepFunctions-StartSyncExecution', credentials_arn=http_api_role.role_arn, request_parameters={ "Input": "$request.body", "StateMachineArn": state_machine.state_machine_arn }, payload_format_version="1.0", timeout_in_millis=10000) api_gw.CfnRoute(self, 'DefaultRoute', api_id=api.http_api_id, route_key=api_gw.HttpRouteKey.DEFAULT.key, target="integrations/" + integ.ref) core.CfnOutput(self, 'HTTP API URL', value=api.url)
def __init__(self, scope: core.Construct, id: str, log_bucket: _s3.Bucket, config_table: _dynamodb.Table, tshirt_size: str, sink_bucket: _s3.Bucket, vpc: _ec2.Vpc, **kwargs) -> None: super().__init__(scope, id, **kwargs) service_role = _iam.Role( self, 'BatchEmrServiceRole', assumed_by=_iam.ServicePrincipal('elasticmapreduce.amazonaws.com') ) service_role.add_managed_policy(_iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonElasticMapReduceRole')) cluster_role = _iam.Role( self, 'BatchEmrClusterRole', assumed_by=_iam.ServicePrincipal("ec2.amazonaws.com") ) _iam.Policy( self, 'BatchEmrClusterPolicy', statements=[ _iam.PolicyStatement( actions=[ "glue:CreateDatabase", "glue:UpdateDatabase", "glue:DeleteDatabase", "glue:GetDatabase", "glue:GetDatabases", "glue:CreateTable", "glue:UpdateTable", "glue:DeleteTable", "glue:GetTable", "glue:GetTables", "glue:GetTableVersions", "glue:CreatePartition", "glue:BatchCreatePartition", "glue:UpdatePartition", "glue:DeletePartition", "glue:BatchDeletePartition", "glue:GetPartition", "glue:GetPartitions", "glue:BatchGetPartition", "glue:CreateUserDefinedFunction", "glue:UpdateUserDefinedFunction", "glue:DeleteUserDefinedFunction", "glue:GetUserDefinedFunction", "glue:GetUserDefinedFunctions", "cloudwatch:PutMetricData", "dynamodb:ListTables", "s3:HeadBucket", "ec2:Describe*", ], resources=['*'] ), _iam.PolicyStatement( actions=['s3:GetObject'], resources=[ 'arn:aws:s3:::' + ARA_BUCKET_NAME + BINARIES + DataGenConfig.DSDGEN_INSTALL_SCRIPT, 'arn:aws:s3:::' + ARA_BUCKET_NAME + BINARIES + DataGenConfig.JAR_FILE ] ), _iam.PolicyStatement( actions=['s3:PutObject'], resources=[log_bucket.bucket_arn + "/data-generator/*"] ), _iam.PolicyStatement( actions=[ "s3:AbortMultipartUpload", "s3:CreateBucket", "s3:DeleteObject", "s3:GetBucketVersioning", "s3:GetObject", "s3:GetObjectTagging", "s3:GetObjectVersion", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:ListBucketVersions", "s3:ListMultipartUploadParts", "s3:PutBucketVersioning", "s3:PutObject", "s3:PutObjectTagging" ], resources=[ sink_bucket.bucket_arn + '/*', sink_bucket.bucket_arn ] ) ], roles=[cluster_role] ) cluster_role.add_managed_policy(_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore')) _iam.CfnInstanceProfile( self, 'BatchEmrClusterInstanceProfile', roles=[cluster_role.role_name], instance_profile_name=cluster_role.role_name ) # Security Groups for the EMR cluster (private subnet) # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html#emr-sg-elasticmapreduce-master-private master_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-Master-Private', vpc=vpc) slave_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-Slave-Private', vpc=vpc) service_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-ServiceAccess', vpc=vpc, allow_all_outbound=False) # Service SG used by the proxy instance service_sg.add_ingress_rule(master_sg, _ec2.Port.tcp(9443)) service_sg.add_egress_rule(master_sg, _ec2.Port.tcp(8443)) service_sg.add_egress_rule(slave_sg, _ec2.Port.tcp(8443)) # EMR Master master_sg.add_ingress_rule(master_sg, _ec2.Port.all_icmp()) master_sg.add_ingress_rule(master_sg, _ec2.Port.all_tcp()) master_sg.add_ingress_rule(master_sg, _ec2.Port.all_udp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_icmp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_tcp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_udp()) master_sg.add_ingress_rule(service_sg, _ec2.Port.tcp(8443)) # EMR Slave slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_icmp()) slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_tcp()) slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_udp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_icmp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_tcp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_udp()) slave_sg.add_ingress_rule(service_sg, _ec2.Port.tcp(8443)) with open('common/common_cdk/lambda/datagen_config.py', 'r') as f: lambda_source = f.read() configure_datagen_function = _lambda.SingletonFunction( self, 'BatchConfigureDatagenLambda', uuid="58a9a222-ff07-11ea-adc1-0242ac120002", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', function_name='datagen-config', environment={ 'TABLE_NAME': config_table.table_name, 'JAR_LOCATION': BINARIES_LOCATION + DataGenConfig.JAR_FILE, }, timeout=core.Duration.seconds(10) ) configure_datagen_function.role.add_to_policy( _iam.PolicyStatement( actions=[ 'dynamodb:GetItem', 'dynamodb:PutItem', ], resources=[config_table.table_arn] ) ) terminate_cluster = _sfn_tasks.EmrTerminateCluster( self, 'BatchDeleteCluster', cluster_id=_sfn.TaskInput.from_data_at("$.Emr.Cluster.Id").value, integration_pattern=_sfn.IntegrationPattern.RUN_JOB, ) terminate_cluster_error = _sfn_tasks.EmrTerminateCluster( self, 'BatchDeleteClusterError', cluster_id=_sfn.TaskInput.from_data_at("$.Emr.Cluster.Id").value, integration_pattern=_sfn.IntegrationPattern.RUN_JOB, ).next(_sfn.Fail(self, 'StepFailure')) create_cluster = _sfn_tasks.EmrCreateCluster( self, "BatchCreateEMRCluster", name="BatchDatagenCluster", result_path="$.Emr", release_label='emr-5.30.1', log_uri=log_bucket.s3_url_for_object() + "/data-generator", cluster_role=cluster_role, service_role=service_role, bootstrap_actions=[ _sfn_tasks.EmrCreateCluster.BootstrapActionConfigProperty( name="dsdgen-install", script_bootstrap_action=_sfn_tasks.EmrCreateCluster.ScriptBootstrapActionConfigProperty( path=BINARIES_LOCATION + DataGenConfig.DSDGEN_INSTALL_SCRIPT, ) ) ], applications=[ _sfn_tasks.EmrCreateCluster.ApplicationConfigProperty( name="spark" ), _sfn_tasks.EmrCreateCluster.ApplicationConfigProperty( name="hadoop" ) ], instances=_sfn_tasks.EmrCreateCluster.InstancesConfigProperty( emr_managed_master_security_group=master_sg.security_group_id, emr_managed_slave_security_group=slave_sg.security_group_id, service_access_security_group=service_sg.security_group_id, ec2_subnet_ids=vpc.select_subnets().subnet_ids, instance_fleets=[ _sfn_tasks.EmrCreateCluster.InstanceFleetConfigProperty( instance_fleet_type=_sfn_tasks.EmrCreateCluster.InstanceRoleType.MASTER, instance_type_configs=[ _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m4.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5d.xlarge', weighted_capacity=1 ), ], launch_specifications=_sfn_tasks.EmrCreateCluster.InstanceFleetProvisioningSpecificationsProperty( spot_specification=_sfn_tasks.EmrCreateCluster.SpotProvisioningSpecificationProperty( timeout_action=_sfn_tasks.EmrCreateCluster.SpotTimeoutAction.SWITCH_TO_ON_DEMAND, timeout_duration_minutes=5 ) ), target_on_demand_capacity=0, target_spot_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceFleetConfigProperty( instance_fleet_type=_sfn_tasks.EmrCreateCluster.InstanceRoleType.CORE, instance_type_configs=[ _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.2xlarge', weighted_capacity=2 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.2xlarge', weighted_capacity=2 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m4.xlarge', weighted_capacity=1 ) ], launch_specifications=_sfn_tasks.EmrCreateCluster.InstanceFleetProvisioningSpecificationsProperty( spot_specification=_sfn_tasks.EmrCreateCluster.SpotProvisioningSpecificationProperty( timeout_action=_sfn_tasks.EmrCreateCluster.SpotTimeoutAction.SWITCH_TO_ON_DEMAND, timeout_duration_minutes=5 ) ), target_on_demand_capacity=0, target_spot_capacity=DataGenConfig.BATCH_CLUSTER_SIZE[tshirt_size] ) ] ) ).add_catch(handler=terminate_cluster_error, result_path="$.error") configure_datagen = _sfn_tasks.LambdaInvoke( self, "BatchConfigureDatagenTask", lambda_function=configure_datagen_function, payload=_sfn.TaskInput.from_text('{' '"Param": "batch_iterator",' '"Module": "batch",' '"SinkBucket": "'+sink_bucket.s3_url_for_object()+'",' '"Parallelism": "'+str(int(DataGenConfig.BATCH_DATA_SIZE[tshirt_size])*2)+'",' '"DataSize": "'+DataGenConfig.BATCH_DATA_SIZE[tshirt_size]+'",' '"TmpBucket": "fake-bucket"' '}'), result_path='$.Config' ).add_catch(handler=terminate_cluster_error, result_path="$.error") add_datagen_step = _sfn.CustomState( self, 'BatchAddDataGenStep', state_json={ "Type": "Task", "Resource": "arn:aws:states:::elasticmapreduce:addStep.sync", "Parameters": { "ClusterId.$": "$.Emr.Cluster.Id", "Step": { "Name": "DatagenStep", "ActionOnFailure": "CONTINUE", "HadoopJarStep": { "Jar": "command-runner.jar", "Args.$": "$.Config.Payload.StepParam" } } }, "ResultPath": "$.Step", "Next": "BatchUpdateIterator", "Catch": [ { "ErrorEquals": ["States.ALL"], "Next": "BatchDeleteClusterError", "ResultPath": "$.error" } ] } ) update_iterator = _sfn_tasks.DynamoUpdateItem( self, 'BatchUpdateIterator', table=config_table, key={ 'param': _sfn_tasks.DynamoAttributeValue.from_string('batch_iterator') }, update_expression='SET iterator = if_not_exists(iterator, :start) + :inc', expression_attribute_values={ ":inc": _sfn_tasks.DynamoAttributeValue.from_number(1), ":start": _sfn_tasks.DynamoAttributeValue.from_number(0) }, result_path=_sfn.JsonPath.DISCARD ) definition = configure_datagen \ .next(create_cluster) \ .next(add_datagen_step) \ .next(update_iterator) \ .next(terminate_cluster) datagen_stepfunctions = _sfn.StateMachine( self, "BatchDataGenStepFunctions", definition=definition, timeout=core.Duration.minutes(30) ) datagen_stepfunctions.add_to_role_policy( _iam.PolicyStatement( actions=[ 'elasticmapreduce:AddJobFlowSteps', 'elasticmapreduce:DescribeStep' ], resources=['*'] ) ) datagen_stepfunctions.add_to_role_policy( _iam.PolicyStatement( actions= [ "iam:CreateServiceLinkedRole", "iam:PutRolePolicy" ], resources=["arn:aws:iam::*:role/aws-service-role/elasticmapreduce.amazonaws.com*/AWSServiceRoleForEMRCleanup*"], conditions= { "StringLike": { "iam:AWSServiceName": [ "elasticmapreduce.amazonaws.com", "elasticmapreduce.amazonaws.com.cn" ] } } ) ) step_trigger = _events.Rule( self, 'BatchSteptrigger', schedule=_events.Schedule.cron(minute='0/30', hour='*', month='*', week_day='*', year='*') ) step_trigger.add_target(_events_targets.SfnStateMachine(machine=datagen_stepfunctions)) with open('common/common_cdk/lambda/stepfunctions_trigger.py', 'r') as f: lambda_source = f.read() stepfunctions_trigger_lambda = _lambda.SingletonFunction( self, 'BatchStepFunctionsTriggerLambda', uuid="9597f6f2-f840-11ea-adc1-0242ac120002", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', function_name='stepfunctions-batch-datagen-trigger' ) stepfunctions_trigger_lambda.role.add_to_policy( _iam.PolicyStatement( actions=["states:StartExecution"], resources=['*'] ) ) trigger_step_lambda_provider = _custom_resources.Provider( self, 'StepFunctionsTriggerLambdaProvider', on_event_handler=stepfunctions_trigger_lambda ) core.CustomResource( self, 'StepFunctionsTrigger', service_token=trigger_step_lambda_provider.service_token, properties={ "stepArn": datagen_stepfunctions.state_machine_arn } ) # terminate clusters with open('common/common_cdk/lambda/stepfunctions_terminate_emr.py', 'r') as f: lambda_source = f.read() sfn_terminate = _lambda.SingletonFunction( self, 'StepFuncTerminateBatch', uuid='58a9a422-ff07-11ea-adc1-0242ac120002', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', timeout=core.Duration.minutes(5) ) sfn_terminate.role.add_to_policy( _iam.PolicyStatement( actions=[ 'elasticmapreduce:ListClusters', 'elasticmapreduce:TerminateJobFlows', 'states:ListStateMachines', 'states:ListExecutions', 'states:StopExecution' ], resources=['*'] ) ) sfn_terminate_provider = _custom_resources.Provider( self, 'StepFuncTerminateBatchLambdaProvider', on_event_handler=sfn_terminate ) core.CustomResource( self, 'StepFuncTerminateBatchCustomResource', service_token=sfn_terminate_provider.service_token, properties={ "state_machine": 'BatchDatagen' })
def test_add_step_with_argument_overrides(): default_fragment_json = { 'Type': 'Parallel', 'End': True, 'Branches': [{ 'StartAt': 'test-fragment: test-step - Override Args', 'States': { 'test-fragment: test-step - Override Args': { 'Next': 'test-fragment: test-step', 'Catch': [{ 'ErrorEquals': ['States.ALL'], 'ResultPath': '$.Error', 'Next': 'test-fail' }], 'Parameters': { 'ExecutionInput.$': '$$.Execution.Input', 'StepName': 'test-step', 'Args': ['Arg1', 'Arg2'] }, 'Type': 'Task', 'Resource': { 'Fn::GetAtt': ['OverrideStepArgsE9376C9F', 'Arn'] }, 'ResultPath': '$.test-fragmentResultArgs' }, 'test-fragment: test-step': { 'End': True, 'Catch': [{ 'ErrorEquals': ['States.ALL'], 'ResultPath': '$.Error', 'Next': 'test-fail' }], 'Parameters': { 'ClusterId': 'test-cluster-id', 'Step': { 'Name': 'test-step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'Jar', 'MainClass': 'Main', 'Args.$': '$.test-fragmentResultArgs', 'Properties': [] } } }, 'Type': 'Task', 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::elasticmapreduce:addStep.sync' ] ] } }, 'test-fail': { 'Type': 'Fail' } } }] } stack = core.Stack(core.App(), 'test-stack') fragment = emr_chains.AddStepWithArgumentOverrides( stack, 'test-fragment', emr_step=emr_code.EMRStep('test-step', 'Jar', 'Main', ['Arg1', 'Arg2']), cluster_id='test-cluster-id', fail_chain=sfn.Fail(stack, 'test-fail')) print_and_assert(default_fragment_json, fragment)
def __init__(self, scope: Construct, id: str, log_level: CfnParameter): super().__init__(scope, id) self._bundling = {} self.log_level = log_level.value_as_string self.source_path = Path(__file__).parent.parent.parent.parent self.topic = None self.subscription = None self.functions: Dict[Function] = {} self.policies = Policies(self) self.create_functions() # step function steps check_error = sfn.Choice(self, "Check-Error") notify_failed = tasks.LambdaInvoke( self, "Notify-Failed", lambda_function=self.functions["SNS"], payload_response_only=True, retry_on_service_exceptions=True, result_path=None, ) notify_failed.next(sfn.Fail(self, "FailureState")) create_dataset_group = tasks.LambdaInvoke( self, "Create-DatasetGroup", lambda_function=self.functions["CreateDatasetGroup"], result_path="$.DatasetGroupNames", payload_response_only=True, retry_on_service_exceptions=True, ) create_dataset_group.add_retry(backoff_rate=1.05, interval=Duration.seconds(5), errors=["ResourcePending"]) create_dataset_group.add_catch(notify_failed, errors=["ResourceFailed"], result_path="$.serviceError") create_dataset_group.add_catch(notify_failed, errors=["States.ALL"], result_path="$.statesError") import_data = tasks.LambdaInvoke( self, "Import-Data", lambda_function=self.functions["CreateDatasetImportJob"], result_path="$.DatasetImportJobArn", payload_response_only=True, retry_on_service_exceptions=True, ) import_data.add_retry( backoff_rate=1.05, interval=Duration.seconds(5), max_attempts=100, errors=["ResourcePending"], ) import_data.add_catch(notify_failed, errors=["ResourceFailed"], result_path="$.serviceError") import_data.add_catch(notify_failed, errors=["States.ALL"], result_path="$.statesError") update_not_required = sfn.Succeed(self, "Update-Not-Required") notify_success = tasks.LambdaInvoke( self, "Notify-Success", lambda_function=self.functions["SNS"], payload_response_only=True, retry_on_service_exceptions=True, result_path=None, ) notify_prediction_failed = tasks.LambdaInvoke( self, "Notify-Prediction-Failed", lambda_function=self.functions["SNS"], payload_response_only=True, retry_on_service_exceptions=True, result_path=None, ) notify_prediction_failed.next(sfn.Fail(self, "Prediction-Failed")) create_predictor = tasks.LambdaInvoke( self, "Create-Predictor", lambda_function=self.functions["CreatePredictor"], result_path="$.PredictorArn", payload_response_only=True, retry_on_service_exceptions=True, ) create_predictor.add_retry( backoff_rate=1.05, interval=Duration.seconds(5), max_attempts=100, errors=["ResourcePending", "DatasetsImporting"], ) create_predictor.add_catch( notify_prediction_failed, errors=["ResourceFailed"], result_path="$.serviceError", ) create_predictor.add_catch(notify_prediction_failed, errors=["States.ALL"], result_path="$.statesError") create_predictor.add_catch(update_not_required, errors=["NotMostRecentUpdate"]) create_forecast = tasks.LambdaInvoke( self, "Create-Forecast", lambda_function=self.functions["CreateForecast"], result_path="$.ForecastArn", payload_response_only=True, retry_on_service_exceptions=True, ) create_forecast.add_retry( backoff_rate=1.05, interval=Duration.seconds(5), max_attempts=100, errors=["ResourcePending"], ) create_forecast.add_catch( notify_prediction_failed, errors=["ResourceFailed"], result_path="$.serviceError", ) create_forecast.add_catch(notify_prediction_failed, errors=["States.ALL"], result_path="$.statesError") export_forecast = tasks.LambdaInvoke( self, "Export-Forecast", lambda_function=self.functions["PrepareForecastExport"], result_path="$.ExportTableName", payload_response_only=True, retry_on_service_exceptions=True, ) export_forecast.add_catch( notify_prediction_failed, errors=["ResourceFailed"], result_path="$.serviceError", ) export_forecast.add_catch(notify_prediction_failed, errors=["States.ALL"], result_path="$.statesError") create_forecasts = sfn.Map( self, "Create-Forecasts", items_path="$.DatasetGroupNames", parameters={ "bucket.$": "$.bucket", "dataset_file.$": "$.dataset_file", "dataset_group_name.$": "$$.Map.Item.Value", "config.$": "$.config", }, ) # step function definition definition = (check_error.when( sfn.Condition.is_present("$.serviceError"), notify_failed).otherwise(create_dataset_group).afterwards().next( import_data).next( create_forecasts.iterator( create_predictor.next(create_forecast).next( export_forecast).next(notify_success)))) self.state_machine = sfn.StateMachine(self, "DeployStateMachine", definition=definition)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here pvt_bkt = _s3.Bucket(self, "s3bucket") core.Tag.add(pvt_bkt, key="isMonitoredBucket", value="True") # Lets create a cloudtrail to track s3 data events s3_data_event_trail = _cloudtrail.Trail( self, "s3DataEventTrailId", is_multi_region_trail=False, include_global_service_events=False, enable_file_validation=True) # Lets capture S3 Data Events only for our bucket- TO REDUCE COST s3_data_event_trail.add_s3_event_selector( prefixes=[f"{pvt_bkt.bucket_arn}/"], include_management_events=True, read_write_type=_cloudtrail.ReadWriteType.ALL) # Defines an AWS Lambda resource """ with open("lambda_src/make_object_private.py", encoding="utf8") as fp: make_object_private_fn_handler_code = fp.read() remediate_object_acl_fn = _lambda.Function( self, id='remediateObjAclFn', function_name="remediate_object_acl_fn", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(make_object_private_fn_handler_code), handler='index.lambda_handler', timeout=core.Duration.seconds(10) ) # Lets add the necessary permission for the lambda function remediate_object_acl_fn_perms=_iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ "arn:aws:s3:::*", ], actions=[ "s3:GetObjectAcl", "s3:PutObjectAcl" ] ) remediate_object_acl_fn_perms.sid="PutBucketPolicy" remediate_object_acl_fn.add_to_role_policy( remediate_object_acl_fn_perms ) """ with open("lambda_src/is_object_private.py", encoding="utf8") as fp: is_object_private_fn_handler_code = fp.read() is_object_private_fn = _lambda.Function( self, id='isObjPrivateFn', function_name="is_object_private_fn", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(is_object_private_fn_handler_code), handler='index.lambda_handler', timeout=core.Duration.seconds(3)) # Lets add the necessary permission for the lambda function is_object_private_fn_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ "arn:aws:s3:::*", ], actions=["s3:GetObjectAcl"]) is_object_private_fn.sid = "CheckObjectAcl" is_object_private_fn.add_to_role_policy(is_object_private_fn_perms) with open("lambda_src/make_object_private.py", encoding="utf8") as fp: make_object_private_fn_handler_code = fp.read() remediate_object_acl_fn = _lambda.Function( self, id='remediateObjAclFn', function_name="remediate_object_acl_fn", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(make_object_private_fn_handler_code), handler='index.lambda_handler', timeout=core.Duration.seconds(10)) # Lets add the necessary permission for the lambda function remediate_object_acl_fn_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ "arn:aws:s3:::*", ], actions=["s3:PutObjectAcl"]) remediate_object_acl_fn_perms.sid = "PutObjectAcl" remediate_object_acl_fn.add_to_role_policy( remediate_object_acl_fn_perms) info_sec_ops_topic = _sns.Topic(self, "infoSecOpsTopicId", display_name="InfoSecTopic", topic_name="InfoSecOpsTopic") # Subscribe InfoSecOps Email to topic info_sec_ops_topic.add_subscription( _subs.EmailSubscription(global_args.INFO_SEC_OPS_EMAIL)) # Grant Lambda permission to publish to topic # info_sec_ops_topic.grant_publish(lambda_notifier) # State Machine for notifying failed ACLs # Ref: https://docs.aws.amazon.com/cdk/api/latest/docs/aws-stepfunctions-readme.html ############################################################################### ################# STEP FUNCTIONS EXPERIMENTAL CODE - UNSTABLE ################# ############################################################################### is_object_private_task = _sfn.Task( self, "isObjectPrivate?", task=_tasks.InvokeFunction(is_object_private_fn), result_path="$", output_path="$") remediate_object_acl_task = _sfn.Task( self, "RemediateObjectAcl", task=_tasks.InvokeFunction(remediate_object_acl_fn), result_path="$", output_path="$") notify_secops_task = _sfn.Task( self, "Notify InfoSecOps", task=_tasks.PublishToTopic( info_sec_ops_topic, integration_pattern=_sfn.ServiceIntegrationPattern. FIRE_AND_FORGET, message=_sfn.TaskInput.from_data_at("$.sns_message"), subject="Object Acl Remediation")) acl_remediation_failed_task = _sfn.Fail(self, "Acl Remediation Failed", cause="Acl Remediation Failed", error="Check Logs") acl_compliant_task = _sfn.Succeed(self, "Object Acl Compliant", comment="Object Acl is Compliant") remediate_object_acl_sfn_definition = is_object_private_task\ .next(_sfn.Choice(self, "Is Object Private?")\ .when(_sfn.Condition.boolean_equals("$.is_private", True), acl_compliant_task)\ .when(_sfn.Condition.boolean_equals("$.is_private", False), remediate_object_acl_task\ .next(_sfn.Choice(self, "Object Remediation Complete?")\ .when(_sfn.Condition.boolean_equals("$.status", True),acl_compliant_task)\ .when(_sfn.Condition.boolean_equals("$.status", False), notify_secops_task.next(acl_remediation_failed_task))\ .otherwise(acl_remediation_failed_task)\ ) ) .otherwise(acl_remediation_failed_task) ) remediate_object_acl_statemachine = _sfn.StateMachine( self, "stateMachineId", definition=remediate_object_acl_sfn_definition, timeout=core.Duration.minutes(3)) # Cloudwatch Event Triggers put_object_acl_event_targets = [] """ put_object_acl_event_targets.append( _targets.LambdaFunction( handler=remediate_object_acl_fn ) ) """ put_object_acl_event_targets.append( _targets.SfnStateMachine( machine=remediate_object_acl_statemachine)) put_object_acl_event_pattern = _events.EventPattern( source=["aws.s3"], detail_type=["AWS API Call via CloudTrail"], detail={ "eventSource": ["s3.amazonaws.com"], "eventName": ["PutObjectAcl", "PutObject"], "requestParameters": { "bucketName": [f"{pvt_bkt.bucket_name}"] } }) put_object_acl_event_pattern_rule = _events.Rule( self, "putObjectAclEventId", event_pattern=put_object_acl_event_pattern, rule_name=f"put_s3_policy_event_{global_args.OWNER}", enabled=True, description="Trigger an event for S3 PutObjectAcl or PutObject", targets=put_object_acl_event_targets) ########################################### ################# OUTPUTS ################# ########################################### output0 = core.CfnOutput( self, "SecuirtyAutomationFrom", value=f"{global_args.SOURCE_INFO}", description= "To know more about this automation stack, check out our github page." ) output1 = core.CfnOutput( self, "MonitoredS3Bucket", value=(f"https://console.aws.amazon.com/s3/buckets/" f"{pvt_bkt.bucket_name}"), description=f"S3 Bucket for testing purposes") output2 = core.CfnOutput( self, "Helpercommands", value= (f"aws s3api get-object-acl --bucket ${pvt_bkt.bucket_name} --key OBJECT-KEY-NAME" ), description= f"Commands to set object to public, Update OBJECT-KEY-NAME to your needs" )
def create_stepfunction(self) -> Resource: """コンポーネントをビルドしてデプロイするステートマシンの作成 Returns: Resource: step function """ name = f"{self.stack_name}_{self.component_id}_edgedeploy_pipeline" role_name = self.get_role_name("edgedeploy_pipeline") sf_role = aws_iam.Role( self, id=role_name, assumed_by=aws_iam.ServicePrincipal("states.amazonaws.com"), role_name=role_name, path="/service-role/", managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AWSLambdaBasicExecutionRole") ]) sf_role.attach_inline_policy( aws_iam.Policy( self, "AllowCloudWatchLogsForSF", document=aws_iam.PolicyDocument(statements=[ aws_iam.PolicyStatement(actions=[ "logs:CreateLogDelivery", "logs:GetLogDelivery", "logs:UpdateLogDelivery", "logs:DeleteLogDelivery", "logs:ListLogDeliveries", "logs:PutResourcePolicy", "logs:DescribeResourcePolicies", "logs:DescribeLogGroups" ], resources=["*"]) ]))) sf_role.attach_inline_policy( aws_iam.Policy( self, "AllowXRayForSF", document=aws_iam.PolicyDocument(statements=[ aws_iam.PolicyStatement(actions=[ "xray:PutTraceSegments", "xray:PutTelemetryRecords", "xray:GetSamplingRules", "xray:GetSamplingTargets" ], resources=["*"]) ]))) sf_role.attach_inline_policy( aws_iam.Policy( self, "AllowInvokeLambda", document=aws_iam.PolicyDocument(statements=[ aws_iam.PolicyStatement( actions=["lambda:InvokeFunction"], resources=[ self._lambda_build_image.function_arn, self._lambda_build_image.function_arn + ":*", self._lambda_check_image_status.function_arn, self._lambda_check_image_status.function_arn + ":*", self._lambda_create_component.function_arn, self._lambda_create_component.function_arn + ":*", self._lambda_deploy_component.function_arn, self._lambda_deploy_component.function_arn + ":*", self._lambda_check_deploy_status.function_arn, self._lambda_check_deploy_status.function_arn + ":*" ]) ]))) # dockerコンテナをビルド task_build_image = aws_sf_tasks.LambdaInvoke( self, "BuildInferenceImage", lambda_function=self._lambda_build_image, output_path="$.Payload") # dockerコンテナのビルド結果を確認 task_check_build_image_status = aws_sf_tasks.LambdaInvoke( self, "CheckDockerImageBuildStatus", lambda_function=self._lambda_check_image_status, output_path="$.Payload") # dockerコンテナのビルドを待つ wait_image_build = aws_sf.Wait(self, "WaitImageBuildFinish", time=aws_sf.WaitTime.duration( Duration.seconds(30))) # Greengrassのコンポーネントを作成 task_create_greengrass_component = aws_sf_tasks.LambdaInvoke( self, "CreateComponent", lambda_function=self._lambda_create_component, output_path="$.Payload") # Greengrassへデプロイ task_deploy_component = aws_sf_tasks.LambdaInvoke( self, "DeployComponent", lambda_function=self._lambda_deploy_component, output_path="$.Payload") # Greengrassへのデプロイ終了を待つ wait_component_deploy = aws_sf.Wait(self, "WaitDeploymentFinish", time=aws_sf.WaitTime.duration( Duration.seconds(30))) # Greengrassへデプロイ結果を確認 task_check_deployment_status = aws_sf_tasks.LambdaInvoke( self, "CheckDeploymentStatus", lambda_function=self._lambda_check_deploy_status, output_path="$.Payload") # デプロイ失敗 pipeline_failed = aws_sf.Fail(self, "PipelineFailed", error="DeployPipelineFailed", cause="Something went wrong") # 正常終了 pipeline_success = aws_sf.Succeed(self, "PipelineSuccessed") # dockerコンテナが存在したかを判定 choice_component_exists_result = aws_sf.Choice(self, "JudgeComponentExists") # dockerコンテナのビルド結果を判定 choice_image_build_result = aws_sf.Choice(self, "JudgeImageBuildStatus") # dockerコンテナのビルド結果を判定 choice_deployment_result = aws_sf.Choice(self, "JudgeDeploymentStatus") # 正常終了を通知 publish_success_message = aws_sf_tasks.SnsPublish( self, "Publish Success message", topic=aws_sns.Topic(self, "SendDeploySuccess"), message=aws_sf.TaskInput.from_json_path_at("$.message")).next( pipeline_success) # デプロイ失敗を通知 publish_failed_message = aws_sf_tasks.SnsPublish( self, "Publish Failed message", topic=aws_sns.Topic(self, "SendPipelineFailed"), message=aws_sf.TaskInput.from_json_path_at("$.message")).next( pipeline_failed) definition = \ task_build_image.next( choice_component_exists_result .when( aws_sf.Condition.string_equals("$.status", "component_exists"), task_deploy_component) .otherwise( wait_image_build.next( task_check_build_image_status).next( choice_image_build_result.when( aws_sf.Condition.string_equals("$.status", "image_exists"), task_create_greengrass_component .next(task_deploy_component) .next(wait_component_deploy) .next(task_check_deployment_status) .next( choice_deployment_result .when(aws_sf.Condition.string_equals("$.status", "RUNNING"), wait_component_deploy) .when(aws_sf.Condition.string_equals("$.status", "COMPLETED"), publish_success_message) .otherwise(publish_failed_message).afterwards())) .when( aws_sf.Condition.string_equals("$.status", "image_faild"), publish_failed_message) .otherwise( wait_image_build).afterwards()) ) ) #.next(aws_sf.Succeed(self, "GreengrassComponentDeployFinished")) state_machine = aws_sf.StateMachine( self, id=name, state_machine_name=name, definition=definition, state_machine_type=aws_sf.StateMachineType.STANDARD, role=sf_role)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here submit_lambda = aws_lambda_python.PythonFunction( self, "submit-status", entry="./lambdas/example", handler="submit_status", ) get_status_lambda = aws_lambda_python.PythonFunction( self, "get-status", entry="./lambdas/example", handler="get_status") final_status_lambda = aws_lambda_python.PythonFunction( self, "final-status", entry="./lambdas/example", handler="final_status") submit_job = tasks.LambdaInvoke( self, "Submit Job", lambda_function=submit_lambda, payload_response_only=True, ) wait_x = sfn.Wait(self, "Wait X Seconds", time=sfn.WaitTime.seconds_path("$.seconds")) get_status = tasks.LambdaInvoke( self, "Get Job Status", lambda_function=get_status_lambda, payload_response_only=True, ) job_failed = sfn.Fail( self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED", ) final_status = tasks.LambdaInvoke( self, "Get Final Job Status", lambda_function=final_status_lambda, payload_response_only=True, ) definition = (submit_job.next(wait_x).next(get_status).next( sfn.Choice(self, "Job Complete?").when( sfn.Condition.string_equals("$.status", "FAILED"), job_failed).when( sfn.Condition.string_equals("$.status", "SUCCEEDED"), final_status).otherwise(wait_x))) sfn.StateMachine( self, "StateMachine", definition=definition, timeout=core.Duration.minutes(5), )
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) submit_lambda = PythonFunction(self, 'Submit', handler='handler', index='submit.py', entry=os.path.join( os.getcwd(), 'lambdas'), runtime=lambda_.Runtime.PYTHON_3_8) get_status_lambda = PythonFunction(self, 'Status', handler='handler', index='status.py', entry=os.path.join( os.getcwd(), 'lambdas'), runtime=lambda_.Runtime.PYTHON_3_8) submit_job = tasks.LambdaInvoke( self, "Submit Job", lambda_function=submit_lambda, # Lambda's result is in the attribute `Payload` output_path="$.Payload") wait_x = sfn.Wait(self, "Wait X Seconds", time=sfn.WaitTime.seconds_path("$.waitSeconds")) get_status = tasks.LambdaInvoke( self, "Get Job Status", lambda_function=get_status_lambda, # Pass just the field named "guid" into the Lambda, put the # Lambda's result in a field called "status" in the response output_path="$.Payload") job_failed = sfn.Fail(self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED") final_status = tasks.LambdaInvoke( self, "Get Final Job Status", lambda_function=get_status_lambda, # Use "guid" field as input output_path="$.Payload") definition = submit_job.next(wait_x).next(get_status).next( sfn.Choice(self, "Job Complete?").when( sfn.Condition.string_equals("$.status", "FAILED"), job_failed).when( sfn.Condition.string_equals("$.status", "SUCCEEDED"), final_status).otherwise(wait_x)) sfn.StateMachine(self, "StateMachine", definition=definition, timeout=cdk.Duration.minutes(5))
def __init__(self, app: core.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) submit_job_activity = sfn.Activity(self, "SubmitJob") check_job_activity = sfn.Activity(self, "CheckJob") do_mapping_activity1 = sfn.Activity(self, "MapJOb1") do_mapping_activity2 = sfn.Activity(self, "MapJOb2") submit_job = sfn.Task( self, "Submit Job", task=sfn_tasks.InvokeActivity(submit_job_activity), result_path="$.guid", ) task1 = sfn.Task( self, "Task 1 in Mapping", task=sfn_tasks.InvokeActivity(do_mapping_activity1), result_path="$.guid", ) task2 = sfn.Task( self, "Task 2 in Mapping", task=sfn_tasks.InvokeActivity(do_mapping_activity2), result_path="$.guid", ) wait_x = sfn.Wait( self, "Wait X Seconds", time=sfn.WaitTime.seconds_path('$.wait_time'), ) get_status = sfn.Task( self, "Get Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", result_path="$.status", ) is_complete = sfn.Choice(self, "Job Complete?") job_failed = sfn.Fail(self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED") final_status = sfn.Task( self, "Get Final Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", ) definition_map = task1.next(task2) process_map = sfn.Map(self, "Process_map", max_concurrency=10).iterator(definition_map) definition = submit_job \ .next(process_map) \ .next(wait_x) \ .next(get_status) \ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "FAILED"), job_failed) .when(sfn.Condition.string_equals( "$.status", "SUCCEEDED"), final_status) .otherwise(wait_x)) sfn.StateMachine( self, "StateMachine", definition=definition, timeout=core.Duration.seconds(30), )
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) #################################################################################### # IoT Events # IoT Events: Execution role iot_events_execution_role = iam.Role( self, "IoTEventsExecutionRole", assumed_by=iam.ServicePrincipal("iotevents.amazonaws.com")) iot_events_execution_role.add_to_policy( iam.PolicyStatement(resources=["*"], actions=["iot:Publish"])) iot_events_execution_role.add_to_policy( iam.PolicyStatement(resources=["*"], actions=["SNS:Publish"])) # IoT Events: Input inputDefinitionProperty = iotevents.CfnInput.InputDefinitionProperty( attributes=[{ "jsonPath": "gatewayid" }, { "jsonPath": "last_uplink_received_timestamp_ms" }, { "jsonPath": "last_connection_status" }, { "jsonPath": "timestamp_iso8601" }]) iot_events_input = iotevents.CfnInput( self, "LoRaWANGatewayConnectivityStatusInput", input_definition=inputDefinitionProperty, input_name="LoRaWANGatewayConnectivityStatusInput", input_description= "Input for connectivity status updates for LoRaWAN gateways") # IoT Events: Detector Model detector_model_definition = iotevents.CfnDetectorModel.DetectorModelDefinitionProperty( initial_state_name=lorawan_gateway_monitoring_detectormodel. initial_state_name, states=lorawan_gateway_monitoring_detectormodel.get_states(self)) iot_events_model = iotevents.CfnDetectorModel( self, "LoRaWANGatewayConnectivityModel", detector_model_definition=detector_model_definition, detector_model_name="LoRaWANGatewayConnectivityModel", detector_model_description= "Detector model for LoRaWAN gateway connectivity status", key="gatewayid", evaluation_method="BATCH", role_arn=iot_events_execution_role.role_arn) #################################################################################### # Lambda function GetWirelessGatewayStatisticsLambda # Lambda function GetWirelessGatewayStatisticsLambda: Execution Role get_wireless_gateway_statistics_lambda_role = iam.Role( self, "GetWirelessGatewayStatisticsLambdaExecutionRole", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com")) get_wireless_gateway_statistics_lambda_role.add_to_policy( iam.PolicyStatement(resources=[ "arn:aws:iotwireless:" + self.region + ":" + self.account + ":WirelessGateway/*" ], actions=[ "iotwireless:ListWirelessGateways", "iotwireless:GetWirelessGatewayStatistics" ])) get_wireless_gateway_statistics_lambda_role.add_to_policy( iam.PolicyStatement(resources=[ "arn:aws:iotevents:" + self.region + ":" + self.account + ":input/LoRaWANGatewayConnectivityStatusInput" ], actions=["iotevents:BatchPutMessage"])) # Lambda function GetWirelessGatewayStatisticsLambda: Lambda function configuration get_wireless_gateway_statistics_lambda = lambda_.Function( self, "GetWirelessGatewayStatisticsLambda", code=lambda_.Code.asset( "src_get_wireless_gateway_statistics_lambda"), runtime=lambda_.Runtime.PYTHON_3_7, handler="lambda.handler", role=get_wireless_gateway_statistics_lambda_role, timeout=cdk.Duration.seconds(25)) get_wireless_gateway_statistics_lambda.add_environment( "TEST_MODE", "true") get_wireless_gateway_statistics_lambda.add_environment( "IOT_EVENTS_INPUT_NAME", "LoRaWANGatewayConnectivityStatusInput") #################################################################################### # SNS topic sns_topic = sns.Topic( self, "LoRaWANGatewayNotificationTopic", display_name= "Topic to use for notifications about LoRaWAN gateway events like connect or disconnect", topic_name="LoRaWANGatewayNotificationTopic") email_address = cdk.CfnParameter(self, "emailforalarms") sns_topic.add_subscription( subscriptions.EmailSubscription(email_address.value_as_string)) #################################################################################### # Step Function # State 'Fail' failure_state = sfn.Fail(self, "Fail") # State 'Wait' wait_state = sfn.Wait(self, "Sleep", time=sfn.WaitTime.duration( cdk.Duration.minutes(4))) # State 'Ingest gateway connectivity status into IoT Events input' lambda_invoke_state = tasks.LambdaInvoke( self, "Ingest gateway connectivity status into IoT Events input", result_path="$.wireless_gateway_stats", lambda_function=get_wireless_gateway_statistics_lambda # payload=task_input_payload ) # Stat 'Did IoT events ingestion run successfull?' choice_lambda_state = sfn.Choice( self, "Did IoT events ingestion run successfull?") choice_lambda_state.when( sfn.Condition.number_equals( "$.wireless_gateway_stats.Payload.status", 200), wait_state) choice_lambda_state.otherwise(failure_state) # Define transitions wait_state.next(lambda_invoke_state) lambda_invoke_state.next(choice_lambda_state) # Crreate a state machine gateway_watchdog_state_machine = sfn.StateMachine( self, "LoRaWANGatewayWatchdogStatemachine", definition=lambda_invoke_state, state_machine_name="LoRaWANGatewayWatchdogStatemachine") #################################################################################### # CloudFormation Stack outputs cdk.CfnOutput( self, "StateMachineARN", value=gateway_watchdog_state_machine.state_machine_arn, description= "Please run 'aws stepfunctions start-execution --state-machine-arn <LorawanConnectivityWatchdogStack.StateMachineARN>' to start the monitoring of LoRaWAN gateway connectivity", ) cdk.CfnOutput( self, "StateMachineStartCommand", value='aws stepfunctions start-execution --state-machine-arn ' + gateway_watchdog_state_machine.state_machine_arn, description= "Please run this command to start the monitoring of LoRaWAN gateway connectivity", ) cdk.CfnOutput( self, "StateMachineStopommand", value='aws stepfunctions stop-execution --state-machine-arn ' + gateway_watchdog_state_machine.state_machine_arn, description= "Please run this command to stop the monitoring of LoRaWAN gateway connectivity", )
def test_nested_state_machine_chain(): default_fragment_json = { 'Type': 'Parallel', 'End': True, 'Branches': [{ 'StartAt': 'test-fragment: test-nested-state-machine', 'States': { 'test-fragment: test-nested-state-machine': { 'Next': 'test-fragment: test-nested-state-machine - Parse JSON Output', 'Catch': [{ 'ErrorEquals': ['States.ALL'], 'ResultPath': '$.Error', 'Next': 'test-fail' }], 'Parameters': { 'StateMachineArn': { 'Ref': 'teststatemachine7F4C511D' }, 'Input': { 'Key1': 'Value1' } }, 'Type': 'Task', 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::states:startExecution.sync' ] ] } }, 'test-fragment: test-nested-state-machine - Parse JSON Output': { 'End': True, 'Catch': [{ 'ErrorEquals': ['States.ALL'], 'ResultPath': '$.Error', 'Next': 'test-fail' }], 'Parameters': { 'JsonString.$': '$.Output' }, 'Type': 'Task', 'Resource': { 'Fn::GetAtt': ['ParseJsonString859DB4F0', 'Arn'] }, 'ResultPath': '$' }, 'test-fail': { 'Type': 'Fail' } } }] } stack = core.Stack(core.App(), 'test-stack') state_machine = sfn.StateMachine(stack, 'test-state-machine', definition=sfn.Chain.start( sfn.Succeed(stack, 'Succeeded'))) fragment = emr_chains.NestedStateMachine(stack, 'test-fragment', name='test-nested-state-machine', state_machine=state_machine, input={'Key1': 'Value1'}, fail_chain=sfn.Fail( stack, 'test-fail')) print_and_assert(default_fragment_json, fragment)
def __init__(self, scope: Construct, construct_id: str, env, **kwargs) -> None: super().__init__(scope, construct_id, env=env, **kwargs) rg_property = network_fw.CfnRuleGroup.RuleGroupProperty( rule_variables=None, rules_source=network_fw.CfnRuleGroup.RulesSourceProperty( stateless_rules_and_custom_actions=network_fw.CfnRuleGroup. StatelessRulesAndCustomActionsProperty(stateless_rules=[ network_fw.CfnRuleGroup.StatelessRuleProperty( priority=10, rule_definition=network_fw.CfnRuleGroup. RuleDefinitionProperty( actions=["aws:drop"], match_attributes=network_fw.CfnRuleGroup. MatchAttributesProperty(destinations=[ network_fw.CfnRuleGroup.AddressProperty( address_definition="127.0.0.1/32") ]))) ]))) nf_rule_group = network_fw.CfnRuleGroup( scope=self, id='GuardDutyNetworkFireWallRuleGroup', capacity=100, rule_group_name='guardduty-network-firewall', type='STATELESS', description='Guard Duty network firewall rule group', tags=[CfnTag(key='Name', value='cfn.rule-group.stack')], rule_group=rg_property) """ https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-rule-dlq.html#dlq-considerations """ dlq_statemachine = sqs.Queue(self, 'DLQStateMachine', queue_name='dlq_state_machine') guardduty_firewall_ddb = ddb.Table( scope=self, id=f'GuarddutyFirewallDDB', table_name='GuardDutyFirewallDDBTable', removal_policy=RemovalPolicy.DESTROY, partition_key=ddb.Attribute(name='HostIp', type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) """ IAM role for ddb permission """ nf_iam_role = iam.Role( self, 'DDBRole', role_name=f'ddb-nf-role-{env.region}', assumed_by=iam.ServicePrincipal(service='lambda.amazonaws.com')) nf_iam_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents" ])) nf_iam_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=[ guardduty_firewall_ddb.table_arn, f"{guardduty_firewall_ddb.table_arn}/*" ], actions=[ "dynamodb:PutItem", "dynamodb:GetItem", "dynamodb:Scan" ])) nf_iam_role.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=[nf_rule_group.ref, f"{nf_rule_group.ref}/*"], actions=[ "network-firewall:DescribeRuleGroup", "network-firewall:UpdateRuleGroup" ])) record_ip_in_db = _lambda.Function( self, 'RecordIpInDB', function_name='record-ip-in-ddb', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset('lambda_fns'), handler='addIPToDDB.handler', environment=dict(ACLMETATABLE=guardduty_firewall_ddb.table_name), role=nf_iam_role) """ https://docs.amazonaws.cn/en_us/eventbridge/latest/userguide/eb-event-patterns-content-based-filtering.html """ record_ip_task = step_fn_task.LambdaInvoke( self, 'RecordIpDDBTask', lambda_function=record_ip_in_db, payload=step_fn.TaskInput.from_object({ "comment": "Relevant fields from the GuardDuty / Security Hub finding", "HostIp.$": "$.detail.findings[0].ProductFields.aws/guardduty/service/action/networkConnectionAction/remoteIpDetails/ipAddressV4", "Timestamp.$": "$.detail.findings[0].ProductFields.aws/guardduty/service/eventLastSeen", "FindingId.$": "$.id", "AccountId.$": "$.account", "Region.$": "$.region" }), result_path='$', payload_response_only=True) firewall_update_rule = _lambda.Function( scope=self, id='GuardDutyUpdateNetworkFirewallRule', function_name='gurdduty-update-networkfirewal-rule-group', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset('lambda_fns'), handler='updateNetworkFireWall.handler', environment=dict( FIREWALLRULEGROUP=nf_rule_group.ref, RULEGROUPPRI='30000', CUSTOMACTIONNAME='GuardDutytoFirewall', CUSTOMACTIONVALUE='gurdduty-update-networkfirewal-rule-group'), role=nf_iam_role) firewall_update_rule_task = step_fn_task.LambdaInvoke( self, 'FirewallUpdateRuleTask', lambda_function=firewall_update_rule, input_path='$', result_path='$', payload_response_only=True) firewall_no_update_job = step_fn.Pass(self, 'No Firewall change') notify_failure_job = step_fn.Fail(self, 'NotifyFailureJob', cause='Any Failure', error='Unknown') send_to_slack = _lambda.Function( scope=self, id='SendAlertToSlack', function_name='gurdduty-networkfirewal-to-slack', runtime=_lambda.Runtime.PYTHON_3_8, handler="sendSMSToSlack.handler", code=_lambda.Code.from_asset('lambda_fns')) send_slack_task = step_fn_task.LambdaInvoke( scope=self, id='LambdaToSlackDemo', lambda_function=send_to_slack, input_path='$', result_path='$') is_new_ip = step_fn.Choice(self, "New IP?") is_block_succeed = step_fn.Choice(self, "Block sucessfully?") definition = step_fn.Chain \ .start(record_ip_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2) .add_catch(errors=["States.ALL"], handler=notify_failure_job)) \ .next(is_new_ip .when(step_fn.Condition.boolean_equals('$.NewIP', True), firewall_update_rule_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2 ) .add_catch(errors=["States.ALL"], handler=notify_failure_job) .next( is_block_succeed .when(step_fn.Condition.boolean_equals('$.Result', False), notify_failure_job) .otherwise(send_slack_task) ) ) .otherwise(firewall_no_update_job) ) guardduty_state_machine = step_fn.StateMachine( self, 'GuarddutyStateMachine', definition=definition, timeout=Duration.minutes(5), state_machine_name='guardduty-state-machine') event.Rule( scope=self, id='EventBridgeCatchIPv4', description="Security Hub - GuardDuty findings with remote IP", rule_name='guardduty-catch-ipv4', event_pattern=event.EventPattern( account=['123456789012'], detail_type=["GuardDuty Finding"], source=['aws.securityhub'], detail={ "findings": { "ProductFields": { "aws/guardduty/service/action/networkConnectionAction/remoteIpDetails/ipAddressV4": [{ "exists": True }] } } }), targets=[ event_target.SfnStateMachine( machine=guardduty_state_machine, dead_letter_queue=dlq_statemachine) ]) """ Send other findings to slack """ send_finding_to_slack = _lambda.Function( self, 'SendFindingToSlack', function_name='send-finding-to-slack', runtime=_lambda.Runtime.PYTHON_3_8, handler="sendFindingToSlack.handler", code=_lambda.Code.from_asset('lambda_fns')) send_findings_task = step_fn_task.LambdaInvoke( self, 'SendFindingToSlackTask', lambda_function=send_finding_to_slack, payload=step_fn.TaskInput.from_object({ "comment": "Others fields from the GuardDuty / Security Hub finding", "severity.$": "$.detail.findings[0].Severity.Label", "Account_ID.$": "$.account", "Finding_ID.$": "$.id", "Finding_Type.$": "$.detail.findings[0].Types", "Region.$": "$.region", "Finding_description.$": "$.detail.findings[0].Description" }), result_path='$') slack_failure_job = step_fn.Fail(self, 'SlackNotifyFailureJob', cause='Any Failure', error='Unknown') finding_definition = step_fn.Chain \ .start(send_findings_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2) .add_catch(errors=["States.ALL"], handler=slack_failure_job)) sechub_findings_state_machine = step_fn.StateMachine( self, 'SecHubFindingsStateMachine', definition=finding_definition, timeout=Duration.minutes(5), state_machine_name='sechub-finding-state-machine') event.Rule(scope=self, id='EventBridgeFindings', description="Security Hub - GuardDuty findings others", rule_name='others-findings', event_pattern=event.EventPattern( account=['123456789012'], source=['aws.securityhub'], detail_type=['Security Hub Findings - Imported'], detail={"severity": [5, 8]}), targets=[ event_target.SfnStateMachine( machine=sechub_findings_state_machine, dead_letter_queue=dlq_statemachine) ])
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Lets create couple of instances to test): vpc = _ec2.Vpc(self, "abacVPC", cidr="10.13.0.0/21", max_azs=2, nat_gateways=0, subnet_configuration=[ _ec2.SubnetConfiguration( name="pubSubnet", cidr_mask=24, subnet_type=_ec2.SubnetType.PUBLIC) ]) core.Tag.add(vpc, key="ServiceProvider", value="KonStone", include_resource_types=[]) weak_sg = _ec2.SecurityGroup( self, "web_sec_grp", vpc=vpc, description="Allow internet access from the world", allow_all_outbound=True) # vpc_cidr_block # weak_sg.add_ingress_rule(_ec2.Peer.any_ipv4(), weak_sg.add_ingress_rule(_ec2.Peer.ipv4(vpc.vpc_cidr_block), _ec2.Port.tcp(22), "Allow SSH access from the VPC Only.") # We are using the latest AMAZON LINUX AMI # Benefit of having SSM Agent pre-installed ami_id = _ec2.AmazonLinuxImage(generation=_ec2.AmazonLinuxGeneration. AMAZON_LINUX_2).get_image(self).image_id # https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_iam/Role.html instace_profile_role = _iam.Role( self, 'ec2ssmroleid', assumed_by=_iam.ServicePrincipal('ec2.amazonaws.com'), role_name="instace_profile_role") instace_profile_role.add_managed_policy( _iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore')) instance_profile_role_additional_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ "arn:aws:logs:*:*:*", ], actions=["logs:Create*", "logs:PutLogEvents"]) instance_profile_role_additional_perms.sid = "PutBucketPolicy" instace_profile_role.add_to_policy( instance_profile_role_additional_perms) inst_profile_01 = _iam.CfnInstanceProfile( self, "instProfile01Id", roles=[instace_profile_role.role_name], ) # Let us bootstrap the server with the required agents try: with open("./bootstrap_scripts/install_agents.sh", mode='rb') as file: bootstrap_data = file.read() except OSError: print('Failed to get UserData script') install_agents = _ec2.UserData.for_linux() install_agents.add_commands(str(bootstrap_data, 'utf-8')) # The EC2 Instance to monitor for failed SSH Logins ssh_monitored_inst_01 = _ec2.CfnInstance( self, "sshMonitoredInstance01", image_id=ami_id, instance_type="t2.micro", monitoring=False, tags=[{ "key": "ServiceProvider", "value": "KonStone" }], iam_instance_profile=inst_profile_01.ref, network_interfaces=[{ "deviceIndex": "0", "associatePublicIpAddress": True, "subnetId": vpc.public_subnets[0].subnet_id, "groupSet": [weak_sg.security_group_id] }], #https: //github.com/aws/aws-cdk/issues/3419 user_data=core.Fn.base64(install_agents.render()), ) """ linux_ami = _ec2.GenericLinuxImage({ "cn-northwest-1": "ami-0f62e91915e16cfc2","eu-west-1": "ami-12345678"}) ssh_monitored_inst_01_02 = _ec2.Instance(self, "monitoredInstance02", instance_type=_ec2.InstanceType(instance_type_identifier="t2.micro"), instance_name="monitoredInstance02", machine_image=linux_ami, vpc=vpc, security_group=[weak_sg.security_group_id], # vpc_subnets=_ec2.SubnetSelection(subnet_type=_ec2.SubnetType.PUBLIC) vpc_subnets=vpc.public_subnets[0].subnet_id, # user_data=_ec2.UserData.custom(t_user_data) ) """ # The log group name to store logs info_sec_ops_log_group = _logs.LogGroup( self, "infoSecOpsLogGroupId", log_group_name=(f"/Mystique/InfoSec/Automation/" f"{ssh_monitored_inst_01.ref}"), retention=_logs.RetentionDays.ONE_WEEK) # Defines an AWS Lambda resource with open("lambda_src/quarantine_ec2_instance.py", encoding="utf8") as fp: quarantine_ec2_instance_fn_handler_code = fp.read() quarantine_ec2_instance_fn = _lambda.Function( self, id='quarantineEc2InstanceFnId', function_name="quarantine_ec2_instance", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(quarantine_ec2_instance_fn_handler_code), handler='index.lambda_handler', timeout=core.Duration.seconds(5)) quarantine_ec2_instance_fn_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ "*", ], actions=[ "ec2:RevokeSecurityGroupIngress", "ec2:DescribeSecurityGroupReferences", "ec2:RevokeSecurityGroupEgress", "ec2:ApplySecurityGroupsToClientVpnTargetNetwork", "ec2:DescribeSecurityGroups", "ec2:CreateSecurityGroup", "ec2:DescribeInstances", "ec2:CreateTags", "ec2:StopInstances", "ec2:CreateVolume", "ec2:CreateSnapshots", "ec2:CreateSnapshot", "ec2:DescribeSnapshots", "ec2:ModifyInstanceAttribute" ]) quarantine_ec2_instance_fn_perms.sid = "AllowLambdaToQuarantineEC2" quarantine_ec2_instance_fn.add_to_role_policy( quarantine_ec2_instance_fn_perms) info_sec_ops_topic = _sns.Topic(self, "infoSecOpsTopicId", display_name="InfoSecTopic", topic_name="InfoSecOpsTopic") # Ref: https://docs.aws.amazon.com/cdk/api/latest/docs/aws-stepfunctions-readme.html ############################################################################### ################# STEP FUNCTIONS EXPERIMENTAL CODE - UNSTABLE ################# ############################################################################### quarantine_ec2_instance_task = _sfn.Task( self, "Quarantine EC2 Instance", task=_tasks.InvokeFunction(quarantine_ec2_instance_fn), result_path="$") notify_secops_task = _sfn.Task( self, "Notify InfoSecOps", task=_tasks.PublishToTopic( info_sec_ops_topic, integration_pattern=_sfn.ServiceIntegrationPattern. FIRE_AND_FORGET, message=_sfn.TaskInput.from_data_at("$.message"), subject="SSH Error Response Notification")) ssh_error_response_failure = _sfn.Fail( self, "SSH Error Response Actions Failed", cause="All Response Actions were NOT completed", error="Check Logs") ssh_error_response_success = _sfn.Succeed( self, "SSH Error Response Actions Succeeded", comment="All Response Action Completed Successfully", ) ssh_error_response_sfn_definition = quarantine_ec2_instance_task\ .next(notify_secops_task\ .next(_sfn.Choice(self, "SSH Errors Response Complete?")\ .when(_sfn.Condition.number_equals("$.SdkHttpMetadata.HttpStatusCode", 200),ssh_error_response_success)\ .when(_sfn.Condition.not_( _sfn.Condition.number_equals("$.SdkHttpMetadata.HttpStatusCode", 200)), ssh_error_response_failure)\ .otherwise(ssh_error_response_failure) ) ) ssh_error_response_statemachine = _sfn.StateMachine( self, "stateMachineId", definition=ssh_error_response_sfn_definition, timeout=core.Duration.minutes(5)) ############################################################################### ################# STEP FUNCTIONS EXPERIMENTAL CODE - UNSTABLE ################# ############################################################################### # LAMBDA TO TRIGGER STATE MACHINE - since state cannot be invoked by SNS with open("lambda_src/trigger_state_machine.py", encoding="utf8") as fp: trigger_state_machine_fn_handler_code = fp.read() trigger_state_machine_fn = _lambda.Function( self, id='sshErrorResponseFnId', function_name="trigger_ssh_error_response_state_machine_fn", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.InlineCode(trigger_state_machine_fn_handler_code), # code=_lambda.Code.asset("lambda_src/is_policy_permissive.py"), # code=_lambda.Code.asset('lambda_src'), # code=_lambda.InlineCode(code_body), handler='index.lambda_handler', timeout=core.Duration.seconds(5), environment={ "STATE_MACHINE_ARN": f"{ssh_error_response_statemachine.state_machine_arn}", }) trigger_state_machine_fn_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ f"{ssh_error_response_statemachine.state_machine_arn}", ], actions=["states:StartExecution"]) trigger_state_machine_fn_perms.sid = "PutBucketPolicy" trigger_state_machine_fn.add_to_role_policy( trigger_state_machine_fn_perms) """ version = trigger_state_machine_fn.add_version(name=datetime.now().isoformat()) trigger_state_machine_fn_alias = _lambda.Alias(self, 'lmdaAliasId', alias_name='MystiqueTestAlias', version=version ) """ # Lets add permission to SNS to trigger our lambda function trigger_lambda_perms = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, resources=[ trigger_state_machine_fn.function_arn, ], actions=[ "lambda:InvokeFunction", ]) trigger_lambda_perms.sid = "TriggerLambaFunction" # info_sec_ops_topic.add_to_resource_policy( trigger_lambda_perms ) # Subscribe InfoSecOps Email to topic info_sec_ops_topic.add_subscription( _subs.EmailSubscription(global_args.INFO_SEC_OPS_EMAIL)) # info_sec_ops_topic.add_subscription(_subs.LambdaSubscription(trigger_state_machine_fn)) trigger_state_machine_fn_alarm = trigger_state_machine_fn.metric_all_errors( ).create_alarm( self, "fn-error-alarm", threshold=5, alarm_name="trigger_state_machine_fn_error_alarm", evaluation_periods=5, period=core.Duration.minutes(1), ) subscribe_trigger_state_machine_fn_to_logs = _logs.SubscriptionFilter( self, "sshErrorLogSubscriptionId", log_group=info_sec_ops_log_group, destination=_logs_destination.LambdaDestination( trigger_state_machine_fn), filter_pattern=_logs.FilterPattern.space_delimited( "Mon", "day", "timestamp", "ip", "id", "status", "...").where_string("status", "=", "Invalid"), ) # https://pypi.org/project/aws-cdk.aws-logs/ # We are creating three filter # tooManySshDisconnects, invalidSshUser and invalidSshKey: # When a user tries to SSH with invalid username the next line is logged in the SSH log file: # Apr 20 02:39:35 ip-172-31-63-56 sshd[17136]: Received disconnect from xxx.xxx.xxx.xxx: 11: [preauth] too_many_ssh_disconnects_metric = _cloudwatch.Metric( namespace=f"{global_args.OWNER}", metric_name="tooManySshDisconnects") too_many_ssh_disconnects_filter = _logs.MetricFilter( self, "tooManySshDisconnectsFilterId", log_group=info_sec_ops_log_group, metric_namespace=too_many_ssh_disconnects_metric.namespace, metric_name=too_many_ssh_disconnects_metric.metric_name, filter_pattern=_logs.FilterPattern.space_delimited( "Mon", "day", "timestamp", "ip", "id", "msg1", "msg2", "...").where_string("msg2", "=", "disconnect"), metric_value="1") invalid_ssh_user_metric = _cloudwatch.Metric( namespace=f"{global_args.OWNER}", metric_name="invalidSshUser", ) invalid_ssh_user_filter = _logs.MetricFilter( self, "invalidSshUserFilterId", log_group=info_sec_ops_log_group, metric_namespace=invalid_ssh_user_metric.namespace, metric_name=invalid_ssh_user_metric.metric_name, filter_pattern=_logs.FilterPattern.space_delimited( "Mon", "day", "timestamp", "ip", "id", "status", "...").where_string("status", "=", "Invalid"), metric_value="1") invalid_ssh_key_metric = _cloudwatch.Metric( namespace=f"{global_args.OWNER}", metric_name="invalidSshKey") invalid_ssh_key_filter = _logs.MetricFilter( self, "invalidSshKeyFilterId", log_group=info_sec_ops_log_group, metric_namespace=invalid_ssh_key_metric.namespace, metric_name=invalid_ssh_key_metric.metric_name, filter_pattern=_logs.FilterPattern.space_delimited( "Mon", "day", "timestamp", "ip", "id", "msg1", "msg2", "...").where_string("msg1", "=", "Connection").where_string( "msg2", "=", "closed"), metric_value="1") # Now let us create alarms # alarm is raised there are more than 5(threshold) of the measured metrics in two(datapoint) of the last three seconds(evaluation): # Period=60Seconds, Eval=3, Threshold=5 too_many_ssh_disconnects_alarm = _cloudwatch.Alarm( self, "tooManySshDisconnectsAlarmId", alarm_name="too_many_ssh_disconnects_alarm", alarm_description= "The number disconnect requests is greater then 5, even 1 time in 3 minutes", metric=too_many_ssh_disconnects_metric, actions_enabled=True, period=core.Duration.minutes(1), threshold=5, evaluation_periods=3, datapoints_to_alarm=1, statistic="sum", comparison_operator=_cloudwatch.ComparisonOperator. GREATER_THAN_OR_EQUAL_TO_THRESHOLD) invalid_ssh_user_alarm = _cloudwatch.Alarm( self, "invalidSshUserAlarmId", alarm_name="too_many_invalid_ssh_users_alarm", alarm_description= "The number of invalid ssh users connecting is greater then 5, even 1 time in 3 minutes", metric=invalid_ssh_user_metric, actions_enabled=True, period=core.Duration.minutes(1), threshold=5, evaluation_periods=3, datapoints_to_alarm=1, statistic="sum", comparison_operator=_cloudwatch.ComparisonOperator. GREATER_THAN_THRESHOLD) invalid_ssh_user_alarm.add_alarm_action( _cloudwatch_actions.SnsAction(info_sec_ops_topic)) invalid_ssh_key_alarm = _cloudwatch.Alarm( self, "invalidSshKeyAlarmId", alarm_name="too_many_invalid_ssh_key_alarm", alarm_description= "The number of invalid ssh keys connecting is greater then 5, even 1 time in 3 minutes", metric=invalid_ssh_key_metric, actions_enabled=True, period=core.Duration.minutes(1), threshold=5, evaluation_periods=3, datapoints_to_alarm=1, statistic="sum", comparison_operator=_cloudwatch.ComparisonOperator. GREATER_THAN_OR_EQUAL_TO_THRESHOLD) invalid_ssh_key_alarm.add_alarm_action( _cloudwatch_actions.SnsAction(info_sec_ops_topic)) ########################################### ################# OUTPUTS ################# ########################################### output0 = core.CfnOutput( self, "SecuirtyAutomationFrom", value=f"{global_args.SOURCE_INFO}", description= "To know more about this automation stack, check out our github page." ) output1_1 = core.Fn.get_att( logical_name_of_resource="sshMonitoredInstance01", attribute_name="PublicIp") output1 = core.CfnOutput(self, "MonitoredInstance", value=output1_1.to_string(), description="Web Server Public IP to attack") output2 = core.CfnOutput( self, "SSHAlarms", value= (f"https://console.aws.amazon.com/cloudwatch/home?region=" f"{core.Aws.REGION}" f"#/configuration/" f"#alarmsV2:?search=ssh&alarmStateFilter=ALL&alarmTypeFilter=ALL" ), description="Check out the cloudwatch Alarms") output3 = core.CfnOutput( self, "SubscribeToNotificationTopic", value=(f"https://console.aws.amazon.com/sns/v3/home?" f"{core.Aws.REGION}" f"#/topic/" f"{info_sec_ops_topic.topic_arn}"), description= "Add your email to subscription and confirm subscription") output_test_1 = core.CfnOutput( self, "ToGenInvalidKeyErrors", value= (f"for i in {{1..30}}; do ssh -i $RANDOM ec2-user@{output1_1.to_string()}; sleep 2; done &" ), description= "Generates random key names and connects to server 30 times over 60 seconds" ) output_test_2 = core.CfnOutput( self, "ToGenInvalidUserErrors", value= (f"for i in {{1..30}}; do ssh ec2-user$RANDOM@{output1_1.to_string()}; sleep 2; done &" ), description= "Generates random user names and connects to server 30 times over 60 seconds" ) """
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) ### # DynamoDB Table ### # We store Flight, Hotel and Rental Car bookings in the same table. # # For more help with single table DB structures see - https://www.dynamodbbook.com/ # pk - the trip_id e.g. 1234 # sk - bookingtype#booking_id e.g. HOTEL#345634, FLIGHT#574576, PAYMENT#45245 table = dynamo_db.Table(self, "Bookings", partition_key=dynamo_db.Attribute(name="pk", type=dynamo_db.AttributeType.STRING), sort_key=dynamo_db.Attribute(name="sk", type=dynamo_db.AttributeType.STRING) ) ### # Lambda Functions ### # We need Booking and Cancellation functions for our 3 services # # All functions need access to our DynamoDB table above. # We also need to take payment for this trip # # 1) Flights # 2) Hotel # 3) Payment # 1) Flights reserve_flight_lambda = self.create_lambda(scope=self, lambda_id="reserveFlightLambdaHandler", handler='flights/reserveFlight.handler', table=table) confirm_flight_lambda = self.create_lambda(scope=self, lambda_id="confirmFlightLambdaHandler", handler='flights/confirmFlight.handler', table=table) cancel_flight_lambda = self.create_lambda(scope=self, lambda_id="cancelFlightLambdaHandler", handler='flights/cancelFlight.handler', table=table) # 2) Hotel reserve_hotel_lambda = self.create_lambda(scope=self, lambda_id="reserveHotelLambdaHandler", handler='hotel/reserveHotel.handler', table=table) confirm_hotel_lambda = self.create_lambda(scope=self, lambda_id="confirmHotelLambdaHandler", handler='hotel/confirmHotel.handler', table=table) cancel_hotel_lambda = self.create_lambda(scope=self, lambda_id="cancelHotelLambdaHandler", handler='hotel/cancelHotel.handler', table=table) # 3) Payment For Holiday take_payment_lambda = self.create_lambda(scope=self, lambda_id="takePaymentLambdaHandler", handler='payment/takePayment.handler', table=table) refund_payment_lambda = self.create_lambda(scope=self, lambda_id="refundPaymentLambdaHandler", handler='payment/refundPayment.handler', table=table) ### # Saga Pattern Step Function ### # Follows a strict order: # 1) Reserve Flights and Hotel # 2) Take Payment # 3) Confirm Flight and Hotel booking # Our two end states booking_succeeded = step_fn.Succeed(self, 'We have made your booking!') booking_failed = step_fn.Fail(self, "Sorry, We Couldn't make the booking") # 1) Reserve Flights and Hotel cancel_hotel_reservation = step_fn.Task(self, 'CancelHotelReservation', task=step_fn_tasks.InvokeFunction(cancel_hotel_lambda), result_path='$.CancelHotelReservationResult' ).add_retry(max_attempts=3).next(booking_failed) reserve_hotel = step_fn.Task(self, 'ReserveHotel', task=step_fn_tasks.InvokeFunction(reserve_hotel_lambda), result_path='$.ReserveHotelResult' ).add_catch(cancel_hotel_reservation, result_path="$.ReserveHotelError") cancel_flight_reservation = step_fn.Task(self, 'CancelFlightReservation', task=step_fn_tasks.InvokeFunction(cancel_flight_lambda), result_path='$.CancelFlightReservationResult' ).add_retry(max_attempts=3).next(cancel_hotel_reservation) reserve_flight = step_fn.Task(self, 'ReserveFlight', task=step_fn_tasks.InvokeFunction(reserve_flight_lambda), result_path='$.ReserveFlightResult' ).add_catch(cancel_flight_reservation, result_path="$.ReserveFlightError") # 2) Take Payment refund_payment = step_fn.Task(self, 'RefundPayment', task=step_fn_tasks.InvokeFunction(refund_payment_lambda), result_path='$.RefundPaymentResult' ).add_retry(max_attempts=3).next(cancel_flight_reservation) take_payment = step_fn.Task(self, 'TakePayment', task=step_fn_tasks.InvokeFunction(take_payment_lambda), result_path='$.TakePaymentResult' ).add_catch(refund_payment, result_path="$.TakePaymentError") # 3) Confirm Flight and Hotel Booking confirm_hotel = step_fn.Task(self, 'ConfirmHotelBooking', task=step_fn_tasks.InvokeFunction(confirm_hotel_lambda), result_path='$.ConfirmHotelBookingResult' ).add_catch(refund_payment, result_path="$.ConfirmHotelBookingError") confirm_flight = step_fn.Task(self, 'ConfirmFlight', task=step_fn_tasks.InvokeFunction(confirm_flight_lambda), result_path='$.ConfirmFlightResult' ).add_catch(refund_payment, result_path="$.ConfirmFlightError") definition = step_fn.Chain \ .start(reserve_hotel) \ .next(reserve_flight) \ .next(take_payment) \ .next(confirm_hotel) \ .next(confirm_flight) \ .next(booking_succeeded) saga = step_fn.StateMachine(self, 'BookingSaga', definition=definition, timeout=core.Duration.minutes(5)) # defines an AWS Lambda resource to connect to our API Gateway and kick # off our step function saga_lambda = _lambda.Function(self, "sagaLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="sagaLambda.handler", code=_lambda.Code.from_asset("lambdas"), environment={ 'statemachine_arn': saga.state_machine_arn } ) saga.grant_start_execution(saga_lambda) # defines an API Gateway REST API resource backed by our "stateMachineLambda" function. api_gw.LambdaRestApi(self, 'SagaPatternSingleTable', handler=saga_lambda )
def __init__(self, app: core.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) # Create both lambdas with open("lambda-submit.py", encoding="utf8") as fp: lambda_submit_code = fp.read() lambdaFn1 = lambda_.Function( self, "submitsmbatch", code=lambda_.InlineCode(lambda_submit_code), handler="index.lambda_handler", timeout=core.Duration.seconds(300), runtime=lambda_.Runtime.PYTHON_3_7, environment={ "transform_job_name":transform_job_name, "model_name":model_name, "max_concurrent":max_concurrent, "max_payload_size":max_payload_size, "s3_uri_in":s3_uri_in, "s3_uri_out":s3_uri_out, "instance_type":instance_type, "instance_count":instance_count, } ) # Add perms lambdaFn1.add_to_role_policy(aws_iam.PolicyStatement( actions = ['sagemaker:CreateTransformJob',], resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),] )) with open("lambda-check.py", encoding="utf8") as fp: lambda_check_code = fp.read() lambdaFn2 = lambda_.Function( self, "checksmbatch", code=lambda_.InlineCode(lambda_check_code), handler="index.lambda_handler", timeout=core.Duration.seconds(300), runtime=lambda_.Runtime.PYTHON_3_7, environment={"model_name":model_name, # CHANGE TO YOUR ENDPOINT NAME!! "content_type":"text/csv"} ) # Add perms lambdaFn2.add_to_role_policy(aws_iam.PolicyStatement( actions = ['sagemaker:DescribeTransformJob',], resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),] )) # Define state machine # submit_job_activity = sfn.Activity( # self, "SubmitJob" # ) # check_job_activity = sfn.Activity( # self, "CheckJob" # ) submit_job = sfn.Task( self, "Submit Job", task=sfn_tasks.InvokeFunction(lambdaFn1), ) wait_x = sfn.Wait( self, "Wait 1 minute", time=sfn.WaitTime.duration(core.Duration.minutes(1)), ) get_status = sfn.Task( self, "Get Job Status", task=sfn_tasks.InvokeFunction(lambdaFn2), ) is_complete = sfn.Choice( self, "Job Complete?" ) job_failed = sfn.Fail( self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED" ) final_status = sfn.Task( self, "Get Final Job Status", task=sfn_tasks.InvokeFunction(lambdaFn2), ) definition = submit_job\ .next(wait_x)\ .next(get_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "Failed"), job_failed) .when(sfn.Condition.string_equals( "$.status", "Completed"), final_status) .otherwise(wait_x)) sfn.StateMachine( self, "SMbatchInference", definition=definition, )