def __init__(self, scope: core.Construct, id: str, *, emr_step: emr_code.EMRStep, cluster_id: str, result_path: Optional[str] = None, output_path: Optional[str] = None, fail_chain: Optional[sfn.IChainable] = None, wait_for_step_completion: bool = True): super().__init__(scope, id) override_step_args = emr_lambdas.OverrideStepArgsBuilder.get_or_build( self) override_step_args_task = sfn.Task( self, f'{emr_step.name} - Override Args', result_path=f'$.{id}ResultArgs', task=sfn_tasks.InvokeFunction( override_step_args, payload={ 'ExecutionInput': sfn.TaskInput.from_context_at('$$.Execution.Input').value, 'StepName': emr_step.name, 'Args': emr_step.args })) resolved_step = emr_step.resolve(self) resolved_step['HadoopJarStep']['Args'] = sfn.TaskInput.from_data_at( f'$.{id}ResultArgs').value integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_step_completion \ else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET add_step_task = sfn.Task(self, emr_step.name, output_path=output_path, result_path=result_path, task=emr_tasks.EmrAddStepTask( cluster_id=cluster_id, step=resolved_step, integration_pattern=integration_pattern)) if fail_chain: override_step_args_task.add_catch(fail_chain, errors=['States.ALL'], result_path='$.Error') add_step_task.add_catch(fail_chain, errors=['States.ALL'], result_path='$.Error') override_step_args_task.next(add_step_task) self._start = override_step_args_task self._end = add_step_task
def __init__(self, scope: core.App, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) pass_through_lambda = _lambda.Function( self, 'PassThroughLambda', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('lambda'), handler='pass_through_lambda.handler') loop_count_lambda = _lambda.Function( self, 'LoopCountLambda', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('lambda'), handler='loop_count_lambda.handler') start_state_machine = sfn.Task( self, "Start CodeBuild Lambda", task=sfn_tasks.InvokeFunction(pass_through_lambda)) wait_x = sfn.Wait( self, "Wait X Seconds", time=sfn.WaitTime.seconds_path('$.wait_time'), ) get_state_machine_status = sfn.Task( self, "Get Build Status", task=sfn_tasks.InvokeFunction(loop_count_lambda)) is_complete = sfn.Choice(self, "Job Complete?") state_machine_failed = sfn.Fail(self, "Build Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED") state_machine_success = sfn.Pass(self, "Build Successs") definition = start_state_machine\ .next(wait_x)\ .next(get_state_machine_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "FAILED"), state_machine_failed) .when(sfn.Condition.string_equals( "$.status", "SUCCEEDED"), state_machine_success) .otherwise(wait_x)) sfn.StateMachine( self, "StateMachine", definition=definition, timeout=core.Duration.seconds(60), )
def __init__(self, app: cdk.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) submit_job_activity = sfn.Activity( self, "SubmitJob" ) check_job_activity = sfn.Activity( self, "CheckJob" ) submit_job = sfn.Task( self, "Submit Job", task=sfn_tasks.InvokeActivity(submit_job_activity), result_path="$.guid", ) wait_x = sfn.Wait( self, "Wait X Seconds", duration=sfn.WaitDuration.seconds_path('$.wait_time'), ) get_status = sfn.Task( self, "Get Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", result_path="$.status", ) is_complete = sfn.Choice( self, "Job Complete?" ) job_failed = sfn.Fail( self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED" ) final_status = sfn.Task( self, "Get Final Job Status", task=sfn_tasks.InvokeActivity(check_job_activity), input_path="$.guid", ) definition = submit_job\ .next(wait_x)\ .next(get_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "FAILED"), job_failed) .when(sfn.Condition.string_equals( "$.status", "SUCCEEDED"), final_status) .otherwise(wait_x)) sfn.StateMachine( self, "StateMachine", definition=definition, timeout_sec=30, )
def _lambda_glue_crawler_task(self): root_path = Path(os.path.dirname(os.path.abspath(__file__))) lambda_handler = root_path.joinpath('lambdas', 'trigger_glue_crawler').as_posix() func = lambda_.Function( self, "TriggerGlueCrawlerLambdaHandler", handler="lambda.lambda_handler", code=lambda_.AssetCode(lambda_handler), environment={"crawlerName": f"{self.glue_crawler.name}"}, initial_policy=[ iam.PolicyStatement( actions=["glue:StartCrawler"], resources=["*"], ), ], timeout=core.Duration.seconds(30), runtime=lambda_.Runtime.PYTHON_3_7, ) # turn the lambda into a stepfunction task so we can use it in our state machine task = sfn.Task( self, "TriggerGlueCrawlerLambda", task=sfnt.InvokeFunction(func), ) return task
def _emr_spark_step_task(self): # Add a EMR Step to run our pyspark job; an asset with our application will be # created and referenced in the job definition root_path = Path(os.path.dirname(os.path.abspath(__file__))) pyspark_script = root_path.joinpath('pyspark', 'example.py').as_posix() pyspark_example_asset = s3_assets.Asset(self, "PythonScript", path=pyspark_script) sample_spark_step = sfn.Task( self, "RunSparkExample", task=sfnt.EmrAddStep( # the concrete ClusterId will be picked up from the current state of the statem achine cluster_id=sfn.Data.string_at("$.ClusterId"), name="SparkExample", # `command-runner.jar` is a jar from AWS that can be used to execute generic command (like `spark-submit`) # if you write your programs in Java/Scala you can directly insert your jar file here instead of script location jar="command-runner.jar", args=[ "spark-submit", "--deploy-mode", "cluster", "--master", "yarn", f"s3://{pyspark_example_asset.s3_bucket_name}/{pyspark_example_asset.s3_object_key}", ], ), result_path="DISCARD", ) return sample_spark_step
def test_emr_add_step_task(): default_task_json = { 'End': True, 'Parameters': { 'ClusterId': 'test-cluster-id', 'Step': { 'Key1': { 'Key2': 'Value2' } } }, 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::elasticmapreduce:addStep.sync' ] ] }, 'Type': 'Task' } stack = core.Stack(core.App(), 'test-stack') task = sfn.Task(stack, 'test-task', task=emr_tasks.EmrAddStepTask('test-cluster-id', {'Key1': { 'Key2': 'Value2' }})) print_and_assert(default_task_json, task)
def __init__(self, scope: core.Construct, id: str, *, message: sfn.TaskInput, subject: Optional[str] = None, topic: Optional[sns.Topic] = None, result_path: str = '$.PublishResult', output_path: str = '$', cause: Optional[str] = None, comment: Optional[str] = None, error: Optional[str] = None): super().__init__(scope, id) self._end = sfn.Fail(self, 'Execution Failed', cause=cause, comment=comment, error=error) if topic is not None: self._start = sfn.Task( self, 'Failure Notification', input_path='$', output_path=output_path, result_path=result_path, task=sfn_tasks.PublishToTopic(topic, message=message, subject=subject)) self._start.next(self._end) else: self._start = self._end
def build( scope: core.Construct, id: str, *, cluster_configuration_path: str = '$.ClusterConfiguration.Cluster', output_path: str = '$', result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) update_cluster_tags_lambda = emr_lambdas.UpdateClusterTagsBuilder.get_or_build( construct) return sfn.Task( construct, 'Update Cluster Tags', output_path=output_path, result_path=result_path, task=sfn_tasks.InvokeFunction( update_cluster_tags_lambda, payload={ 'ExecutionInput': sfn.TaskInput.from_context_at('$$.Execution.Input').value, 'ClusterConfiguration': sfn.TaskInput.from_data_at( cluster_configuration_path).value }))
def build( scope: core.Construct, id: str, *, default_fail_if_cluster_running: bool, cluster_configuration_path: str = '$.ClusterConfiguration.Cluster', output_path: str = '$', result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) fail_if_cluster_running_lambda = emr_lambdas.FailIfClusterRunningBuilder.get_or_build( construct) return sfn.Task( construct, 'Fail If Cluster Running', output_path=output_path, result_path=result_path, task=sfn_tasks.InvokeFunction( fail_if_cluster_running_lambda, payload={ 'ExecutionInput': sfn.TaskInput.from_context_at('$$.Execution.Input').value, 'DefaultFailIfClusterRunning': default_fail_if_cluster_running, 'ClusterConfiguration': sfn.TaskInput.from_data_at( cluster_configuration_path).value }))
def test_start_execution_task(): default_task_json = { 'End': True, 'Parameters': { 'StateMachineArn': { 'Ref': 'teststatemachine7F4C511D' }, 'Input.$': '$$.Execution.Input' }, 'Type': 'Task', 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::states:startExecution.sync' ] ] } } stack = core.Stack(core.App(), 'test-stack') state_machine = sfn.StateMachine(stack, 'test-state-machine', definition=sfn.Chain.start( sfn.Succeed(stack, 'Succeeded'))) task = sfn.Task(stack, 'test-task', task=emr_tasks.StartExecutionTask(state_machine, )) print_and_assert(default_task_json, task)
def __init__(self, scope: core.Construct, id: str, *, message: sfn.TaskInput, subject: Optional[str] = None, topic: Optional[sns.Topic] = None, result_path: str = '$.PublishResult', output_path: Optional[str] = None): super().__init__(scope, id) self._end = sfn.Succeed(self, 'Succeeded', output_path=output_path) if topic is not None: self._start = sfn.Task( self, 'Success Notification', input_path='$', output_path='$', result_path=result_path, task=sfn_tasks.PublishToTopic(topic, message=message, subject=subject)) self._start.next(self._end) else: self._start = self._end
def build( scope: core.Construct, id: str, *, override_cluster_configs_lambda: Optional[ aws_lambda.Function] = None, allowed_cluster_config_overrides: Optional[Dict[str, str]] = None, cluster_configuration_path: str = '$.ClusterConfiguration.Cluster', output_path: str = '$', result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) override_cluster_configs_lambda = \ emr_lambdas.OverrideClusterConfigsBuilder.get_or_build(construct) \ if override_cluster_configs_lambda is None \ else override_cluster_configs_lambda return sfn.Task( construct, 'Override Cluster Configs', output_path=output_path, result_path=result_path, task=sfn_tasks.InvokeFunction( override_cluster_configs_lambda, payload={ 'ExecutionInput': sfn.TaskInput.from_context_at('$$.Execution.Input').value, 'ClusterConfiguration': sfn.TaskInput.from_data_at( cluster_configuration_path).value, 'AllowedClusterConfigOverrides': allowed_cluster_config_overrides }))
def create_state_machine(self, lambda_functions, page_sqs): task_wrapup = aws_stepfunctions.Task( self, "task_wrapup", task = aws_stepfunctions_tasks.RunLambdaTask(lambda_functions["wrapup"]) ) tast_analyze_with_scale = aws_stepfunctions.Task( self, "AnalyzeWithScale", task= aws_stepfunctions_tasks.SendToQueue( queue = page_sqs, message_body = aws_stepfunctions.TaskInput.from_object( { "token": aws_stepfunctions.Context.task_token, "id.$": "$.id", "bucket.$": "$.bucket", "original_upload_pdf.$": "$.original_upload_pdf", "SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN.$": "$.SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN", "key.$": "$.key" } ), delay=None, integration_pattern=aws_stepfunctions.ServiceIntegrationPattern.WAIT_FOR_TASK_TOKEN ) ) process_map = aws_stepfunctions.Map( self, "Process_Map", items_path = "$.image_keys", result_path="DISCARD", parameters = { "id.$": "$.id", "bucket.$": "$.bucket", "original_upload_pdf.$": "$.original_upload_pdf", "SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN.$": "$.SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN", "key.$": "$$.Map.Item.Value" } ).iterator(tast_analyze_with_scale) definition = process_map.next(task_wrapup) aws_stepfunctions.StateMachine( scope = self, id = "multipagepdfa2i_fancy_stepfunction", state_machine_name = "multipagepdfa2i_fancy_stepfunction", definition=definition )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Step Function Starts Here # The first thing we need to do is see if they are asking for pineapple on a pizza pineapple_check_lambda = _lambda.Function(self, "pineappleCheckLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="orderPizza.handler", code=_lambda.Code.from_asset("lambdas"), ) # Step functions are built up of steps, we need to define our first step order_pizza = step_fn.Task(self, 'Order Pizza Job', task=step_fn_tasks.InvokeFunction(pineapple_check_lambda), input_path='$.flavour', result_path='$.pineappleAnalysis' ) # Pizza Order failure step defined job_failed = step_fn.Fail(self, 'Sorry, We Dont add Pineapple', cause='Failed To Make Pizza', error='They asked for Pineapple') # If they didnt ask for pineapple let's cook the pizza cook_pizza = step_fn.Pass(self, 'Lets make your pizza') # If they ask for a pizza with pineapple, fail. Otherwise cook the pizza definition = step_fn.Chain \ .start(order_pizza) \ .next(step_fn.Choice(self, 'With Pineapple?') \ .when(step_fn.Condition.boolean_equals('$.pineappleAnalysis.containsPineapple', True), job_failed) \ .otherwise(cook_pizza)) state_machine = step_fn.StateMachine(self, 'StateMachine', definition=definition, timeout=core.Duration.minutes(5)) # Dead Letter Queue Setup dlq = sqs.Queue(self, 'stateMachineLambdaDLQ', visibility_timeout=core.Duration.seconds(300)) # defines an AWS Lambda resource to connect to our API Gateway state_machine_lambda = _lambda.Function(self, "stateMachineLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="stateMachineLambda.handler", code=_lambda.Code.from_asset("lambdas"), environment={ 'statemachine_arn': state_machine.state_machine_arn } ) state_machine.grant_start_execution(state_machine_lambda) # defines an API Gateway REST API resource backed by our "sqs_publish_lambda" function. api_gw.LambdaRestApi(self, 'Endpoint', handler=state_machine_lambda )
def __init__(self, scope: core.Construct, id: str, name: str, state_machine: sfn.StateMachine, input: Optional[Mapping[str, any]] = None, fail_chain: Optional[sfn.IChainable] = None): super().__init__(scope, id) state_machine_task = sfn.Task( self, name, task=emr_tasks.StartExecutionTask( state_machine=state_machine, input=input, integration_pattern=sfn.ServiceIntegrationPattern.SYNC)) parse_json_string = emr_lambdas.ParseJsonStringBuilder.get_or_build( self) parse_json_string_task = sfn.Task( self, f'{name} - Parse JSON Output', result_path='$', task=sfn_tasks.InvokeFunction( parse_json_string, payload={ 'JsonString': sfn.TaskInput.from_data_at('$.Output').value })) if fail_chain: state_machine_task.add_catch(fail_chain, errors=['States.ALL'], result_path='$.Error') parse_json_string_task.add_catch(fail_chain, errors=['States.ALL'], result_path='$.Error') state_machine_task.next(parse_json_string_task) self._start = state_machine_task self._end = parse_json_string_task
def _emr_terminate_cluster_task(self): # Shutdown the cluster terminate_cluster = sfn.Task( self, "TerminateCluster", task=sfnt.EmrTerminateCluster( cluster_id=sfn.Data.string_at("$.ClusterId"), integration_pattern=sfn.ServiceIntegrationPattern.SYNC, ), result_path="DISCARD", ) return terminate_cluster
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) #lf1= Function(self, id="my_stack_lambda", runtime=Runtime.PYTHON_3_7, handler='handlers/my_lambda_handler', code='', function_name='my_example_lambda') my_table = _dynamodb.Table(self, id='dynamoTable', table_name='testcdktabe', partition_key=_dynamodb.Attribute( name='lastname', type=_dynamodb.AttributeType.STRING)) my_s3_bucket = _s3.Bucket(self, id='s3bucket', bucket_name='mynpbsample3bucket') my_lambda_function = _lambda.Function( self, id='lambdafunction', runtime=_lambda.Runtime.PYTHON_3_7, handler='hello.handler', code=_lambda.Code.asset('lambdacode')) process_purchase_function = _lambda.Function( self, id='process_purchase', runtime=_lambda.Runtime.PYTHON_3_7, handler='process_purchase.handler', code=_lambda.Code.asset('lambdacode')) process_refund_function = _lambda.Function( self, id='process_refund', runtime=_lambda.Runtime.PYTHON_3_7, handler='process_refund.handler', code=_lambda.Code.asset('lambdacode')) #start_state = sfn.Pass(self, "start_state") definition = sfn.Task( self, 'Get Process Type', task=tasks.InvokeFunction(process_purchase_function)) sfn.StateMachine( self, "MyStateMachine", definition=definition, timeout=core.Duration.seconds(30), ) my_topic = sns.Topic(self, "MyTopic", display_name="Customer Subscription")
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # space for feeder Lambda function feeder = aws_lambda.Function(self, id='_feeder', code=aws_lambda.Code.asset('./code'), handler='feeder.handler', runtime=aws_lambda.Runtime.PYTHON_3_7, description='Feeder function for the Witness project') # space for saver Lambda function saver = aws_lambda.Function(self, id='_saver', code=aws_lambda.Code.asset('./code'), handler='saver.handler', runtime=aws_lambda.Runtime.PYTHON_3_7, description='Saver function for the Witness project') # space for feeder lambda trigger archive.add_event_notification(aws_s3.EventType.OBJECT_CREATED_PUT, s3n.LambdaDestination(feeder)) # space for stepfunction feederTask = aws_stepfunctions.Task( self, id='_feederTask', task=aws_tasks.InvokeFunction(feeder)) saverTask = aws_stepfunctions.Task( self, id='_saverTask', task=aws_tasks.InvokeFunction(saver)) definition = feederTask.next(saverTask) orchestrator = aws_stepfunctions.StateMachine(self, id='_orchestrator', state_machine_name='witness_orchestrator', definition=definition)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) logging_lambda = lambda_func.Function( scope=self, id="logging_lambda", function_name="logging_lambda", handler="logging-lambda.main", runtime=lambda_func.Runtime.PYTHON_3_7, code=lambda_func.Code.from_asset("./code")) second_lambda = lambda_func.Function( scope=self, id="second_lambda", function_name="second_lambda", handler="second-lambda.main", runtime=lambda_func.Runtime.PYTHON_3_7, code=lambda_func.Code.from_asset("./code")) logging_lambda_task = tasks.InvokeFunction(logging_lambda) logging_step = stepfunctions.Task(scope=self, id="invoke_logging_function", task=logging_lambda_task) second_lambda_task = tasks.InvokeFunction(second_lambda) second_step = stepfunctions.Task(scope=self, id="invoke_second_function", task=second_lambda_task) definition = logging_step.next(second_step) stepfunctions.StateMachine( scope=self, id="state_machine", state_machine_name="state_machine", definition=definition, )
def __init__(self, app: core.App, cfn_name: str, stack_env): super().__init__(scope=app, id=f"{cfn_name}-{stack_env}") # lambda lambda_task = lambda_.Function( scope=self, id=f"{cfn_name}-lambda-task", code=lambda_.AssetCode.from_asset("lambda_script"), handler="lambda_handler.lambda_task", timeout=core.Duration.seconds(10), runtime=self.LAMBDA_PYTHON_RUNTIME, memory_size=128 ) # StepFunction Tasks sns_source = sfn.Pass( scope=self, id=f"{cfn_name}-sfn-pass", comment="pass example", input_path="$", result_path="$.source", result=sfn.Result.from_string("example"), output_path="$" ) arguments_generation = sfn.Task( scope=self, id=f"{cfn_name}-sfn-lambda-task", task=sfn_tasks.RunLambdaTask( lambda_function=lambda_task, payload=sfn.TaskInput.from_object({ "time.$": "$.time", "source.$": "$.source" })), input_path="$", result_path="$.arguments", output_path="$.arguments.Payload" ) # stepfunctions definition = sns_source.next(arguments_generation) _ = sfn.StateMachine( scope=self, id=f"{cfn_name}-SFn-{stack_env}", definition=definition )
def build(scope: core.Construct, id: str, *, name: str, cluster_id: str, result_path: Optional[str] = None, output_path: Optional[str] = None) -> sfn.Task: # We use a nested Construct to avoid collisions with Task ids construct = core.Construct(scope, id) return sfn.Task( construct, name, output_path=output_path, result_path=result_path, task=sfn_tasks.EmrTerminateCluster( cluster_id=cluster_id, integration_pattern=sfn.ServiceIntegrationPattern.SYNC))
def build(scope: core.Construct, id: str, *, cluster_name: str, cluster_tags: List[core.Tag], profile_namespace: str, profile_name: str, configuration_namespace: str, configuration_name: str, output_path: str = '$', result_path: str = '$.ClusterConfiguration') -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) load_cluster_configuration_lambda = emr_lambdas.LoadClusterConfigurationBuilder.build( construct, profile_namespace=profile_namespace, profile_name=profile_name, configuration_namespace=configuration_namespace, configuration_name=configuration_name) return sfn.Task(construct, 'Load Cluster Configuration', output_path=output_path, result_path=result_path, task=sfn_tasks.InvokeFunction( load_cluster_configuration_lambda, payload={ 'ClusterName': cluster_name, 'ClusterTags': [{ 'Key': t.key, 'Value': t.value } for t in cluster_tags], 'ProfileNamespace': profile_namespace, 'ProfileName': profile_name, 'ConfigurationNamespace': configuration_namespace, 'ConfigurationName': configuration_name, }))
def _emr_create_cluster_task(self, pipeline_name): # Let the Stepfunction create a uniform instance group cluster # with 1 Master and 5 Core nodes create_cluster = sfn.Task( self, "CreateCluster", # this is very similar to the specification menu in AWS UI we used during the course task=sfnt.EmrCreateCluster( name=pipeline_name, applications=[ sfnt.EmrCreateCluster.ApplicationConfigProperty( name="spark") ], # specify the cluster worker/master hardware instances=sfnt.EmrCreateCluster.InstancesConfigProperty( instance_groups=[ sfnt.EmrCreateCluster.InstanceGroupConfigProperty( instance_count=1, instance_role=sfnt.EmrCreateCluster. InstanceRoleType.MASTER, instance_type="m5.xlarge", name="Master", ), sfnt.EmrCreateCluster.InstanceGroupConfigProperty( instance_count=4, instance_role=sfnt.EmrCreateCluster. InstanceRoleType.CORE, instance_type="m5.xlarge", name="Core", ), ], ), cluster_role=self.emr_instance_role, service_role=self.emr_service_role, release_label="emr-6.0.0", log_uri= f"s3://{self.emr_logging_bucket.bucket_name}/{pipeline_name}"), # we output the ClusterId on the state machine status output_path="$.ClusterId", result_path="$.ClusterId", ) return create_cluster
def build(scope: core.Construct, id: str, *, emr_step: emr_code.EMRStep, cluster_id: str, result_path: Optional[str] = None, output_path: Optional[str] = None, wait_for_step_completion: bool = True) -> sfn.Task: # We use a nested Construct to avoid collisions with Task ids construct = core.Construct(scope, id) resolved_step = emr_step.resolve(construct) integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_step_completion \ else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET return sfn.Task(construct, emr_step.name, output_path=output_path, result_path=result_path, task=EmrAddStepTask( cluster_id=cluster_id, step=resolved_step, integration_pattern=integration_pattern))
def build( scope: core.Construct, id: str, *, roles: emr_roles.EMRRoles, cluster_configuration_path: str = '$.ClusterConfiguration.Cluster', result_path: Optional[str] = None, output_path: Optional[str] = None, wait_for_cluster_start: bool = True) -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_cluster_start \ else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET return sfn.Task( construct, 'Start EMR Cluster', output_path=output_path, result_path=result_path, task=EmrCreateClusterTask( roles=roles, cluster_configuration_path=cluster_configuration_path, integration_pattern=integration_pattern))
def _lambda_quality_check_task(self): lambda_role = iam.Role( self, id=f"QualityLambdaRole", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3FullAccess"), iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonAthenaFullAccess"), ], ) root_path = Path(os.path.dirname(os.path.abspath(__file__))) lambda_handler = root_path.joinpath('lambdas', 'quality_check').as_posix() func = lambda_.Function( self, "QualityCheckAthenaLambdaHandler", handler="lambda.lambda_handler", code=lambda_.AssetCode(lambda_handler), environment={"athenaDatabase": f"{self.glue_db_name}"}, role=lambda_role, timeout=core.Duration.seconds(30), runtime=lambda_.Runtime.PYTHON_3_7, ) # turn the lambda into a stepfunction task so we can use it in our state machine task = sfn.Task( self, "QualityCheckAthenaLambda", task=sfnt.InvokeFunction(func), ) return task
def __init__(self, app: core.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) # Create both lambdas with open("lambda-submit.py", encoding="utf8") as fp: lambda_submit_code = fp.read() lambdaFn1 = lambda_.Function( self, "submitsmbatch", code=lambda_.InlineCode(lambda_submit_code), handler="index.lambda_handler", timeout=core.Duration.seconds(300), runtime=lambda_.Runtime.PYTHON_3_7, environment={ "transform_job_name":transform_job_name, "model_name":model_name, "max_concurrent":max_concurrent, "max_payload_size":max_payload_size, "s3_uri_in":s3_uri_in, "s3_uri_out":s3_uri_out, "instance_type":instance_type, "instance_count":instance_count, } ) # Add perms lambdaFn1.add_to_role_policy(aws_iam.PolicyStatement( actions = ['sagemaker:CreateTransformJob',], resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),] )) with open("lambda-check.py", encoding="utf8") as fp: lambda_check_code = fp.read() lambdaFn2 = lambda_.Function( self, "checksmbatch", code=lambda_.InlineCode(lambda_check_code), handler="index.lambda_handler", timeout=core.Duration.seconds(300), runtime=lambda_.Runtime.PYTHON_3_7, environment={"model_name":model_name, # CHANGE TO YOUR ENDPOINT NAME!! "content_type":"text/csv"} ) # Add perms lambdaFn2.add_to_role_policy(aws_iam.PolicyStatement( actions = ['sagemaker:DescribeTransformJob',], resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),] )) # Define state machine # submit_job_activity = sfn.Activity( # self, "SubmitJob" # ) # check_job_activity = sfn.Activity( # self, "CheckJob" # ) submit_job = sfn.Task( self, "Submit Job", task=sfn_tasks.InvokeFunction(lambdaFn1), ) wait_x = sfn.Wait( self, "Wait 1 minute", time=sfn.WaitTime.duration(core.Duration.minutes(1)), ) get_status = sfn.Task( self, "Get Job Status", task=sfn_tasks.InvokeFunction(lambdaFn2), ) is_complete = sfn.Choice( self, "Job Complete?" ) job_failed = sfn.Fail( self, "Job Failed", cause="AWS Batch Job Failed", error="DescribeJob returned FAILED" ) final_status = sfn.Task( self, "Get Final Job Status", task=sfn_tasks.InvokeFunction(lambdaFn2), ) definition = submit_job\ .next(wait_x)\ .next(get_status)\ .next(is_complete .when(sfn.Condition.string_equals( "$.status", "Failed"), job_failed) .when(sfn.Condition.string_equals( "$.status", "Completed"), final_status) .otherwise(wait_x)) sfn.StateMachine( self, "SMbatchInference", definition=definition, )
def build(scope: core.Construct, id: str, *, roles: emr_roles.EMRRoles, kerberos_attributes_secret: Optional[ secretsmanager.Secret] = None, secret_configurations: Optional[Dict[ str, secretsmanager.Secret]] = None, cluster_configuration_path: str = '$.ClusterConfiguration', result_path: Optional[str] = None, output_path: Optional[str] = None, wait_for_cluster_start: bool = True) -> sfn.Task: # We use a nested Construct to avoid collisions with Lambda and Task ids construct = core.Construct(scope, id) event_rule = core.Stack.of(scope).node.try_find_child('EventRule') if event_rule is None: event_rule = events.Rule(construct, 'EventRule', enabled=False, schedule=events.Schedule.rate( core.Duration.minutes(1))) BaseBuilder.tag_construct(event_rule) run_job_flow_lambda = emr_lambdas.RunJobFlowBuilder.get_or_build( construct, roles, event_rule) check_cluster_status_lambda = emr_lambdas.CheckClusterStatusBuilder.get_or_build( construct, event_rule) if kerberos_attributes_secret: run_job_flow_lambda.add_to_role_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=['secretsmanager:GetSecretValue'], resources=[f'{kerberos_attributes_secret.secret_arn}*'])) if secret_configurations is not None: for secret in secret_configurations.values(): run_job_flow_lambda.add_to_role_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=['secretsmanager:GetSecretValue'], resources=[f'{secret.secret_arn}*'])) return sfn.Task( construct, 'Start EMR Cluster (with Secrets)', output_path=output_path, result_path=result_path, task=sfn_tasks.RunLambdaTask( run_job_flow_lambda, integration_pattern=sfn.ServiceIntegrationPattern. WAIT_FOR_TASK_TOKEN, payload=sfn.TaskInput.from_object({ 'ExecutionInput': sfn.TaskInput.from_context_at('$$.Execution.Input').value, 'ClusterConfiguration': sfn.TaskInput.from_data_at( cluster_configuration_path).value, 'TaskToken': sfn.Context.task_token, 'CheckStatusLambda': check_cluster_status_lambda.function_arn, 'RuleName': event_rule.rule_name, 'FireAndForget': not wait_for_cluster_start })))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The start of the image pipeline imageBucket = aws_s3.Bucket(self, "imageBucket") # Capture API activity with a trail imageBucketTrail = aws_cloudtrail.Trail(self, "imageBucketTrail", is_multi_region_trail=False) # Restrict to S3 data-plane events imageBucketTrail.add_s3_event_selector( include_management_events=False, prefixes=[f"{imageBucket.bucket_arn}/"], read_write_type=aws_cloudtrail.ReadWriteType.WRITE_ONLY) # Filter to just PutObject and CopyObject events imageBucketRule = aws_events.Rule( self, "imageBucketRule", event_pattern={ "source": ["aws.s3"], "detail": { "eventSource": ["s3.amazonaws.com"], "eventName": ["PutObject", "CopyObject"], "requestParameters": { "bucketName": [imageBucket.bucket_name] } } }) #-- # Lambda Layers #--------------------# opencvLayer = aws_lambda.LayerVersion( self, 'opencvLayer', code=aws_lambda.AssetCode('layers/opencvLayer'), compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_6]) boto3Layer = aws_lambda.LayerVersion( self, 'boto3Layer', code=aws_lambda.AssetCode('layers/boto3Layer'), compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_6]) #-- # Lambda Functions #--------------------# # Gather info about an image, name, extension, etc getImageInfoFunc = aws_lambda.Function( self, "getImageInfoFunc", code=aws_lambda.AssetCode('functions/getImageInfoFunc'), handler="lambda.handler", runtime=aws_lambda.Runtime.PYTHON_3_6) # The home for the website webBucket = aws_s3.Bucket(self, "webBucket", website_index_document='index.html') # Copy the image to the web bucket copyImageFunc = aws_lambda.Function( self, "copyImageFunc", code=aws_lambda.AssetCode('functions/copyImageFunc'), handler="lambda.handler", runtime=aws_lambda.Runtime.PYTHON_3_6, layers=[boto3Layer], environment={ 'OUTPUTBUCKET': webBucket.bucket_name, 'OUTPUTPREFIX': 'images/' }) # Grant permissions to read from the source and write to the desination imageBucket.grant_read(copyImageFunc) webBucket.grant_write(copyImageFunc) # Create a thumbnail of the image and place in the web bucket createThumbnailFunc = aws_lambda.Function( self, "createThumbnailFunc", code=aws_lambda.AssetCode('functions/createThumbnailFunc'), handler="lambda.handler", runtime=aws_lambda.Runtime.PYTHON_3_6, layers=[boto3Layer, opencvLayer], timeout=core.Duration.seconds(10), memory_size=256, environment={ 'OUTPUTBUCKET': webBucket.bucket_name, 'OUTPUTPREFIX': 'images/' }) # Grant permissions to read from the source and write to the desination imageBucket.grant_read(createThumbnailFunc) webBucket.grant_write(createThumbnailFunc) # Store page information pageTable = aws_dynamodb.Table( self, 'pageTable', partition_key={ 'name': 'pageName', 'type': aws_dynamodb.AttributeType.STRING }, billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST, stream=aws_dynamodb.StreamViewType.NEW_IMAGE) # Save page and image information updatePageInfoFunc = aws_lambda.Function( self, "updatePageInfoFunc", code=aws_lambda.AssetCode('functions/updatePageInfoFunc'), handler="lambda.handler", runtime=aws_lambda.Runtime.PYTHON_3_6, layers=[boto3Layer], environment={ 'PAGETABLE': pageTable.table_name, 'PAGEPREFIX': 'posts/' }) # Grant permissions to write to the page table pageTable.grant_write_data(updatePageInfoFunc) imagePipelineDone = aws_stepfunctions.Succeed(self, "Done processing image") updatePageInfoJob = aws_stepfunctions.Task( self, 'Update page info', task=aws_stepfunctions_tasks.InvokeFunction(updatePageInfoFunc)) updatePageInfoJob.next(imagePipelineDone) copyImageJob = aws_stepfunctions.Task( self, 'Copy image', task=aws_stepfunctions_tasks.InvokeFunction(copyImageFunc)) createThumbnailJob = aws_stepfunctions.Task( self, 'Create thumbnail', task=aws_stepfunctions_tasks.InvokeFunction(createThumbnailFunc)) # These tasks can be done in parallel processImage = aws_stepfunctions.Parallel(self, 'Process image', result_path="$.images") processImage.branch(copyImageJob) processImage.branch(createThumbnailJob) processImage.next(updatePageInfoJob) # Results of file extension check notPng = aws_stepfunctions.Succeed(self, "Not a PNG") # Verify the file extension checkForPng = aws_stepfunctions.Choice(self, 'Is a PNG?') checkForPng.when( aws_stepfunctions.Condition.string_equals('$.extension', 'png'), processImage) checkForPng.otherwise(notPng) # A single image pipeline job for testing getImageInfoJob = aws_stepfunctions.Task( self, 'Get image info', task=aws_stepfunctions_tasks.InvokeFunction(getImageInfoFunc)) getImageInfoJob.next(checkForPng) # Configure the image pipeline and starting state imagePipeline = aws_stepfunctions.StateMachine( self, "imagePipeline", definition=getImageInfoJob) # Matching events start the image pipline imageBucketRule.add_target( aws_events_targets.SfnStateMachine( imagePipeline, input=aws_events.RuleTargetInput.from_event_path( "$.detail.requestParameters")))
def test_emr_create_cluster_task(): default_task_json = { 'End': True, 'Parameters': { 'AdditionalInfo.$': '$.ClusterConfiguration.Cluster.AdditionalInfo', 'AmiVersion.$': '$.ClusterConfiguration.Cluster.AmiVersion', 'Applications.$': '$.ClusterConfiguration.Cluster.Applications', 'AutoScalingRole.$': '$.ClusterConfiguration.Cluster.AutoScalingRole', 'BootstrapActions.$': '$.ClusterConfiguration.Cluster.BootstrapActions', 'Configurations.$': '$.ClusterConfiguration.Cluster.Configurations', 'CustomAmiId.$': '$.ClusterConfiguration.Cluster.CustomAmiId', 'EbsRootVolumeSize.$': '$.ClusterConfiguration.Cluster.EbsRootVolumeSize', 'Instances': { 'AdditionalMasterSecurityGroups.$': '$.ClusterConfiguration.Cluster.Instances.AdditionalMasterSecurityGroups', 'AdditionalSlaveSecurityGroups.$': '$.ClusterConfiguration.Cluster.Instances.AdditionalSlaveSecurityGroups', 'Ec2KeyName.$': '$.ClusterConfiguration.Cluster.Instances.Ec2KeyName', 'Ec2SubnetId.$': '$.ClusterConfiguration.Cluster.Instances.Ec2SubnetId', 'Ec2SubnetIds.$': '$.ClusterConfiguration.Cluster.Instances.Ec2SubnetIds', 'EmrManagedMasterSecurityGroup.$': '$.ClusterConfiguration.Cluster.Instances.EmrManagedMasterSecurityGroup', 'EmrManagedSlaveSecurityGroup.$': '$.ClusterConfiguration.Cluster.Instances.EmrManagedSlaveSecurityGroup', 'HadoopVersion.$': '$.ClusterConfiguration.Cluster.Instances.HadoopVersion', 'InstanceCount.$': '$.ClusterConfiguration.Cluster.Instances.InstanceCount', 'InstanceFleets.$': '$.ClusterConfiguration.Cluster.Instances.InstanceFleets', 'InstanceGroups.$': '$.ClusterConfiguration.Cluster.Instances.InstanceGroups', 'KeepJobFlowAliveWhenNoSteps': True, 'MasterInstanceType.$': '$.ClusterConfiguration.Cluster.Instances.MasterInstanceType', 'Placement.$': '$.ClusterConfiguration.Cluster.Instances.Placement', 'ServiceAccessSecurityGroup.$': '$.ClusterConfiguration.Cluster.Instances.ServiceAccessSecurityGroup', 'SlaveInstanceType.$': '$.ClusterConfiguration.Cluster.Instances.SlaveInstanceType', 'TerminationProtected.$': '$.ClusterConfiguration.Cluster.Instances.TerminationProtected' }, 'JobFlowRole.$': '$.ClusterConfiguration.Cluster.JobFlowRole', 'KerberosAttributes.$': '$.ClusterConfiguration.Cluster.KerberosAttributes', 'LogUri.$': '$.ClusterConfiguration.Cluster.LogUri', 'ManagedScalingPolicy.$': '$.ClusterConfiguration.Cluster.ManagedScalingPolicy', 'Name.$': '$.ClusterConfiguration.Cluster.Name', 'NewSupportedProducts.$': '$.ClusterConfiguration.Cluster.NewSupportedProducts', 'ReleaseLabel.$': '$.ClusterConfiguration.Cluster.ReleaseLabel', 'RepoUpgradeOnBoot.$': '$.ClusterConfiguration.Cluster.RepoUpgradeOnBoot', 'ScaleDownBehavior.$': '$.ClusterConfiguration.Cluster.ScaleDownBehavior', 'SecurityConfiguration.$': '$.ClusterConfiguration.Cluster.SecurityConfiguration', 'ServiceRole.$': '$.ClusterConfiguration.Cluster.ServiceRole', 'StepConcurrencyLevel.$': '$.ClusterConfiguration.Cluster.StepConcurrencyLevel', 'SupportedProducts.$': '$.ClusterConfiguration.Cluster.SupportedProducts', 'Tags.$': '$.ClusterConfiguration.Cluster.Tags', 'VisibleToAllUsers.$': '$.ClusterConfiguration.Cluster.VisibleToAllUsers' }, 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::elasticmapreduce:createCluster.sync' ] ] }, 'Type': 'Task' } stack = core.Stack(core.App(), 'test-stack') task = sfn.Task( stack, 'test-task', task=emr_tasks.EmrCreateClusterTask( roles=emr_profile.EMRRoles(stack, 'test-emr-roles', role_name_prefix='test-roles'), cluster_configuration_path='$.ClusterConfiguration.Cluster', )) print_and_assert(default_task_json, task)