def test_fail_chain(): default_fragment_json = { 'Type': 'Parallel', 'End': True, 'Branches': [{ 'StartAt': 'test-fragment: Failure Notification', 'States': { 'test-fragment: Failure Notification': { 'Next': 'test-fragment: Execution Failed', 'InputPath': '$', 'Parameters': { 'TopicArn': { 'Ref': 'testtopicB3D54793' }, 'Message': 'TestMessage', 'Subject': 'TestSubject' }, 'OutputPath': '$', 'Type': 'Task', 'Resource': { 'Fn::Join': [ '', [ 'arn:', { 'Ref': 'AWS::Partition' }, ':states:::sns:publish' ] ] }, 'ResultPath': '$.PublishResult' }, 'test-fragment: Execution Failed': { 'Type': 'Fail', 'Comment': 'TestComment', 'Error': 'TestError', 'Cause': 'TestCause' } } }] } stack = core.Stack(core.App(), 'test-stack') fragment = emr_chains.Fail(stack, 'test-fragment', message=sfn.TaskInput.from_text('TestMessage'), subject='TestSubject', topic=sns.Topic(stack, 'test-topic'), cause='TestCause', comment='TestComment', error='TestError') print_and_assert(default_fragment_json, fragment)
stack, 'ArtifactsBucket', f'{NAMING_PREFIX}-artifacts') \ if launch_function.emr_profile.artifacts_bucket is None \ else launch_function.emr_profile.artifacts_bucket # Prepare the scripts executed by our Steps for deployment # This uses the Artifacts bucket defined in Cluster Configuration used by our # Launch Function step_code = emr_code.Code.from_path( path='./step_sources', deployment_bucket=deployment_bucket, deployment_prefix='sns_triggered_pipeline/step_sources') # Create a Chain to receive Failure messages fail = emr_chains.Fail(stack, 'FailChain', message=sfn.TaskInput.from_data_at('$.Error'), subject='Pipeline Failure', topic=failure_topic) # Use the State Machine defined earlier to launch the Cluster # The ClusterConfigurationOverrides and Tags will be passed through for # runtime overrides launch_cluster = emr_chains.NestedStateMachine( stack, 'NestedStateMachine', name='Launch SNS Pipeline Cluster StateMachine', state_machine=launch_function.state_machine, input={ 'ClusterConfigurationOverrides': sfn.TaskInput.from_data_at('$.ClusterConfigurationOverrides').value, 'Tags':
def __init__( self, scope: core.Construct, id: str, *, launch_function_name: str, emr_profile: emr_profile.EMRProfile, cluster_configuration: cluster_configuration.ClusterConfiguration, cluster_name: str = None, namespace: str = 'default', default_fail_if_cluster_running: bool = False, success_topic: Optional[sns.Topic] = None, failure_topic: Optional[sns.Topic] = None, override_cluster_configs_lambda: Optional[ aws_lambda.Function] = None, allowed_cluster_config_overrides: Optional[Dict[str, Dict[str, str]]] = None, description: Optional[str] = None, cluster_tags: Union[List[core.Tag], Dict[str, str], None] = None, wait_for_cluster_start: bool = True) -> None: super().__init__(scope, id) if launch_function_name is None: return self._launch_function_name = launch_function_name self._namespace = namespace self._emr_profile = emr_profile self._cluster_configuration = cluster_configuration self._cluster_name = cluster_name self._default_fail_if_cluster_running = default_fail_if_cluster_running self._success_topic = success_topic self._failure_topic = failure_topic self._override_cluster_configs_lambda = override_cluster_configs_lambda self._description = description self._wait_for_cluster_start = wait_for_cluster_start if allowed_cluster_config_overrides is None: self._allowed_cluster_config_overrides = cluster_configuration.override_interfaces.get( 'default', None) else: self._allowed_cluster_config_overrides = allowed_cluster_config_overrides if isinstance(cluster_tags, dict): self._cluster_tags = [ core.Tag(k, v) for k, v in cluster_tags.items() ] elif isinstance(cluster_tags, list): self._cluster_tags = cluster_tags else: self._cluster_tags = [] self._cluster_tags.extend([ core.Tag('deployment:product:name', __product__), core.Tag('deployment:product:version', __version__) ]) if len(cluster_configuration.configuration_artifacts) > 0: if emr_profile.mutable_instance_role: for i in range( len(cluster_configuration.configuration_artifacts)): configuration_artifact = cluster_configuration.configuration_artifacts[ i] bucket_name = configuration_artifact['Bucket'] path = configuration_artifact['Path'] bucket = s3.Bucket.from_bucket_name( self, f'Bucket_{i}', bucket_name) bucket.grant_read(emr_profile.roles.instance_role, path) else: logger.warn( '--------------------------------------------------------------------------' ) logger.warn( 'Unable to authorize the artifacts in the ClusterConfiguration' ) logger.warn( f'The EMRProfile {emr_profile.profile_name} has an immutable Instance Role' ) logger.warn( 'Use of these artifacts will require direct authorization on the EMRProfile' ) logger.warn( '--------------------------------------------------------------------------' ) fail = emr_chains.Fail( self, 'FailChain', message=sfn.TaskInput.from_data_at('$.Error'), subject='EMR Launch Function Failure', topic=failure_topic, error='Failed to Launch Cluster', cause= 'See Execution Event "FailStateEntered" for complete error cause') # Create Task for loading the cluster configuration from Parameter Store load_cluster_configuration = emr_tasks.LoadClusterConfigurationBuilder.build( self, 'LoadClusterConfigurationTask', cluster_name=cluster_name, cluster_tags=self._cluster_tags, profile_namespace=emr_profile.namespace, profile_name=emr_profile.profile_name, configuration_namespace=cluster_configuration.namespace, configuration_name=cluster_configuration.configuration_name, result_path='$.ClusterConfiguration', ) load_cluster_configuration.add_catch(fail, errors=['States.ALL'], result_path='$.Error') # Create Task for overriding cluster configurations override_cluster_configs = emr_tasks.OverrideClusterConfigsBuilder.build( self, 'OverrideClusterConfigsTask', override_cluster_configs_lambda=override_cluster_configs_lambda, allowed_cluster_config_overrides=self. _allowed_cluster_config_overrides, input_path='$.ClusterConfiguration.Cluster', result_path='$.ClusterConfiguration.Cluster', ) # Attach an error catch to the Task override_cluster_configs.add_catch(fail, errors=['States.ALL'], result_path='$.Error') # Create Task to conditionally fail if a cluster with this name is already # running, based on user input fail_if_cluster_running = emr_tasks.FailIfClusterRunningBuilder.build( self, 'FailIfClusterRunningTask', default_fail_if_cluster_running=default_fail_if_cluster_running, input_path='$.ClusterConfiguration.Cluster', result_path='$.ClusterConfiguration.Cluster', ) # Attach an error catch to the task fail_if_cluster_running.add_catch(fail, errors=['States.ALL'], result_path='$.Error') # Create a Task for updating the cluster tags at runtime update_cluster_tags = emr_tasks.UpdateClusterTagsBuilder.build( self, 'UpdateClusterTagsTask', input_path='$.ClusterConfiguration.Cluster', result_path='$.ClusterConfiguration.Cluster', ) # Attach an error catch to the Task update_cluster_tags.add_catch(fail, errors=['States.ALL'], result_path='$.Error') # Create a Task to create the cluster if cluster_configuration.secret_configurations is None and emr_profile.kerberos_attributes_secret is None: # Use a the standard Step Functions/EMR integration to create the cluster create_cluster = emr_tasks.CreateClusterBuilder.build( self, 'CreateClusterTask', roles=emr_profile.roles, input_path='$.ClusterConfiguration.Cluster', result_path='$.LaunchClusterResult', wait_for_cluster_start=wait_for_cluster_start, ) else: # Use the RunJobFlow Lambda to create the cluster to avoid exposing the # SecretConfigurations and KerberosAttributes values create_cluster = emr_tasks.RunJobFlowBuilder.build( self, 'CreateClusterTask', roles=emr_profile.roles, kerberos_attributes_secret=emr_profile. kerberos_attributes_secret, secret_configurations=cluster_configuration. secret_configurations, input_path='$.ClusterConfiguration', result_path='$.LaunchClusterResult', wait_for_cluster_start=wait_for_cluster_start, ) # Attach an error catch to the Task create_cluster.add_catch(fail, errors=['States.ALL'], result_path='$.Error') success = emr_chains.Success( self, 'SuccessChain', message=sfn.TaskInput.from_data_at('$.LaunchClusterResult'), subject='Launch EMR Config Succeeded', topic=success_topic, output_path='$') definition = sfn.Chain \ .start(load_cluster_configuration) \ .next(override_cluster_configs) \ .next(fail_if_cluster_running) \ .next(update_cluster_tags) \ .next(create_cluster) \ .next(success) self._state_machine = sfn.StateMachine( self, 'StateMachine', state_machine_name=f'{namespace}_{launch_function_name}', definition=definition) self._ssm_parameter = ssm.CfnParameter( self, 'SSMParameter', type='String', value=json.dumps(self.to_json()), tier='Intelligent-Tiering', name=f'{SSM_PARAMETER_PREFIX}/{namespace}/{launch_function_name}')
def __init__(self, scope: core.Construct, id: str, emr_launch_stack, artifact_bucket, output_bucket, **kwargs): super().__init__(scope, id, **kwargs) launch_function = emr_launch_stack.launch_function # Create DynamoDB table for tracking dynamo_table = dynamo.Table( self, "dynamotable", partition_key=dynamo.Attribute(name="BatchId", type=dynamo.AttributeType.STRING), sort_key=dynamo.Attribute(name="Name", type=dynamo.AttributeType.STRING), billing_mode=dynamo.BillingMode.PAY_PER_REQUEST) emr_role = aws_iam.Role.from_role_arn( self, "emr_role_iam", role_arn=emr_launch_stack.instance_role_arn) emr_role.add_to_policy( aws_iam.PolicyStatement(actions=["dynamodb:*"], resources=[dynamo_table.table_arn])) emr_role.add_to_policy( aws_iam.PolicyStatement(actions=[ "logs:CreateLogStream", "logs:DescribeLogStreams", "logs:CreateLogGroup", "logs:PutLogEvents", "ec2:DescribeTags" ], resources=["*"])) # SNS Topics for Success/Failures messages from our Pipeline success_topic = sns.Topic(self, 'SuccessTopic') failure_topic = sns.Topic(self, 'FailureTopic') # Upload artifacts to S3 step_code = s3d.BucketDeployment( self, id='sparkscript', destination_bucket=artifact_bucket, destination_key_prefix='steps', sources=[ s3d.Source.asset('infrastructure/emr_orchestration/steps/') ]) # Create a Chain to receive Failure messages fail = emr_chains.Fail(self, 'FailChain', message=sfn.TaskInput.from_data_at('$.Error'), subject='Pipeline Failure', topic=failure_topic) # # Define a Task to Terminate the Cluster on failure terminate_failed_cluster = emr_tasks.TerminateClusterBuilder.build( self, 'TerminateFailedCluster', name='Terminate Failed Cluster', cluster_id=sfn.TaskInput.from_data_at( '$.LaunchClusterResult.ClusterId').value, result_path='$.TerminateResult').add_catch(fail, errors=['States.ALL'], result_path='$.Error') terminate_failed_cluster.next(fail) # Use a NestedStateMachine to launch the cluster launch_cluster = emr_chains.NestedStateMachine( self, 'NestedStateMachine', name='Launch Cluster StateMachine', state_machine=launch_function.state_machine, fail_chain=fail) pyspark_step = emr_chains.AddStepWithArgumentOverrides( self, 'PySparkSceneDetection', emr_step=emr_code.EMRStep( name=f'Scene Detection - PySpark Job', jar='command-runner.jar', args=[ 'spark-submit', '--master', 'yarn', '--deploy-mode', 'cluster', '--packages', 'com.audienceproject:spark-dynamodb_2.12:1.1.2', os.path.join(f's3://{artifact_bucket.bucket_name}', 'steps', 'scene_detection.py'), '--batch-id', 'DynamoDB.BatchId', '--batch-metadata-table-name', dynamo_table.table_name, '--output-bucket', output_bucket.bucket_name, '--synchronized-table-name', 'synchronized-signals' ]), cluster_id=sfn.TaskInput.from_data_at( '$.LaunchClusterResult.ClusterId').value, result_path='$.PySparkResult', fail_chain=terminate_failed_cluster) # Define a Task to Terminate the Cluster terminate_cluster = emr_tasks.TerminateClusterBuilder.build( self, 'TerminateCluster', name='Terminate Cluster', cluster_id=sfn.TaskInput.from_data_at( '$.LaunchClusterResult.ClusterId').value, result_path='$.TerminateResult').add_catch(fail, errors=['States.ALL'], result_path='$.Error') # A Chain for Success notification when the pipeline completes success = emr_chains.Success( self, 'SuccessChain', message=sfn.TaskInput.from_data_at('$.TerminateResult'), subject='Pipeline Succeeded', topic=success_topic) # Assemble the Pipeline definition = sfn.Chain \ .start(launch_cluster) \ .next(pyspark_step) \ .next(terminate_cluster) \ .next(success) # Create the State Machine self.state_machine = sfn.StateMachine( self, 'SceneDetectionStateMachine', state_machine_name='scene-detection-pipeline', definition=definition) self.dynamo_table = dynamo_table