def test_append_states_after_terminal_state_will_fail(): with pytest.raises(ValueError): chain = Chain() chain.append(Pass('Pass')) chain.append(Fail('Fail')) chain.append(Pass('Pass2')) with pytest.raises(ValueError): chain = Chain() chain.append(Pass('Pass')) chain.append(Succeed('Succeed')) chain.append(Pass('Pass2'))
def test_chaining_steps(): s1 = Pass('Step - One') s2 = Pass('Step - Two') s3 = Pass('Step - Three') Chain([s1, s2]) assert s1.next_step == s2 assert s2.next_step is None chain1 = Chain([s2, s3]) assert s2.next_step == s3 chain2 = Chain([s1, s3]) assert s1.next_step == s3 assert s2.next_step == s1.next_step with pytest.raises(DuplicateStatesInChain): chain2.append(s3) with pytest.raises(DuplicateStatesInChain): chain3 = Chain([chain1, chain2]) s1.next(s2) chain3 = Chain([s3, s1]) assert chain3.steps == [s3, s1] assert s3.next_step == s1 assert s1.next_step == s2 assert s2.next_step == s3 Chain([Chain([s3]), Chain([s1])]) with pytest.raises(DuplicateStatesInChain): Chain([Chain([s1, s2, s1]), s3]) Chain([s1, s2, s1, s3]) Chain([Chain([s1, s2]), s3]) assert s1.next_step == s2 assert s2.next_step == s3
def test_choice_example(): next_state = Task( 'NextState', resource='arn:aws:lambda:us-east-1:1234567890:function:NextState') choice_state = Choice('ChoiceState') choice_state.default_choice( Fail('DefaultState', error='DefaultStateError', cause='No Matches!')) choice_state.add_choice( ChoiceRule.NumericEquals(variable='$.foo', value=1), Chain([ Task('FirstMatchState', resource= 'arn:aws:lambda:us-east-1:1234567890:function:FirstMatchState' ), next_state ])) choice_state.add_choice( ChoiceRule.NumericEquals(variable='$.foo', value=2), Chain([ Task( 'SecondMatchState', resource= 'arn:aws:lambda:us-east-1:1234567890:function:SecondMatchState' ), next_state ])) chain = Chain() chain.append( Task( 'FirstState', resource='arn:aws:lambda:us-east-1:1234567890:function:StartLambda' )) chain.append(choice_state) result = Graph(chain).to_dict() assert result == { 'StartAt': 'FirstState', 'States': { 'FirstState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:StartLambda', 'Next': 'ChoiceState' }, 'ChoiceState': { 'Type': 'Choice', 'Choices': [{ 'Variable': '$.foo', 'NumericEquals': 1, 'Next': 'FirstMatchState' }, { 'Variable': '$.foo', 'NumericEquals': 2, 'Next': 'SecondMatchState' }], 'Default': 'DefaultState' }, 'FirstMatchState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:FirstMatchState', 'Next': 'NextState' }, 'SecondMatchState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:SecondMatchState', 'Next': 'NextState' }, 'DefaultState': { 'Type': 'Fail', 'Error': 'DefaultStateError', 'Cause': 'No Matches!' }, 'NextState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:NextState', 'End': True } } }
class StepfunctionsWorkflow(DataJobBase): """Class that defines the methods to create and execute an orchestration using the step functions sdk. example: with StepfunctionsWorkflow("techskills-parser") as tech_skills_parser_orchestration: some-glue-job-1 >> [some-glue-job-2,some-glue-job-3] >> some-glue-job-4 tech_skills_parser_orchestration.execute() """ def __init__( self, datajob_stack: core.Construct, name: str, notification: Union[str, list] = None, role: iam.Role = None, region: str = None, **kwargs, ): super().__init__(datajob_stack, name, **kwargs) self.workflow = None self.chain_of_tasks = None self.role = self.get_role( role=role, datajob_stack=datajob_stack, unique_name=self.unique_name, service_principal="states.amazonaws.com", ) self.region = (region if region is not None else os.environ.get("AWS_DEFAULT_REGION")) self.notification = self._setup_notification(notification) self.kwargs = kwargs # init directed graph dict where values are a set. # we do it like this so that we can use toposort. self.directed_graph = defaultdict(set) def add_task(self, some_task: DataJobBase) -> object: """get the stepfunctions task, sfn_task, we would like to orchestrate.""" return some_task.sfn_task def add_parallel_tasks(self, parallel_tasks: Iterator[DataJobBase]) -> Parallel: """add tasks in parallel (wrapped in a list) to the workflow we would like to orchestrate.""" parallel_pipelines = Parallel(state_id=uuid.uuid4().hex) for a_task in parallel_tasks: logger.debug(f"adding parallel task {a_task}") sfn_task = self.add_task(a_task) parallel_pipelines.add_branch(sfn_task) return parallel_pipelines def _is_one_task(self, directed_graph_toposorted): """If we have length of 2 and the second is an Ellipsis object we have scheduled 1 task. example: some_task >> ... :param directed_graph_toposorted: a toposorted graph, a graph with all the sorted tasks :return: boolean """ return len(directed_graph_toposorted) == 2 and isinstance( list(directed_graph_toposorted[1])[0], type(Ellipsis)) def _construct_toposorted_chain_of_tasks(self) -> Chain: """Take the directed graph and toposort so that we can efficiently organize our workflow, i.e. parallelize where possible. if we have 2 elements where one of both is an Ellipsis object we need to orchestrate just 1 job. In the other case we will loop over the toposorted dag and assign a stepfunctions task or assign multiple tasks in parallel. Returns: toposorted chain of tasks """ self.chain_of_tasks = Chain() directed_graph_toposorted = list(toposort.toposort( self.directed_graph)) if self._is_one_task( directed_graph_toposorted=directed_graph_toposorted): sfn_task = self.add_task(next(iter(directed_graph_toposorted[0]))) self.chain_of_tasks.append(sfn_task) else: for element in directed_graph_toposorted: if len(element) == 1: sfn_task = self.add_task(next(iter(element))) elif len(element) > 1: sfn_task = self.add_parallel_tasks(element) else: raise StepfunctionsWorkflowException( "cannot have an index in the directed graph with 0 elements" ) self.chain_of_tasks.append(sfn_task) return self.chain_of_tasks def build_workflow(self): """create a step functions workflow from the chain_of_tasks.""" self.chain_of_tasks = self._construct_toposorted_chain_of_tasks() logger.debug("creating a chain from all the different steps.") self.chain_of_tasks = self._integrate_notification_in_workflow( chain_of_tasks=self.chain_of_tasks) logger.debug(f"creating a workflow with name {self.unique_name}") sfn_client = boto3.client("stepfunctions") self.workflow = Workflow( name=self.unique_name, definition=self.chain_of_tasks, role=self.role.role_arn, client=sfn_client, **self.kwargs, ) def create(self): """create sfn stack.""" import json cfn_template = json.dumps(self.workflow.definition.to_dict()) CfnStateMachine( scope=self.datajob_stack, id=self.unique_name, state_machine_name=self.unique_name, role_arn=self.role.role_arn, definition_string=cfn_template, **self.kwargs, ) def _setup_notification( self, notification: Union[str, list]) -> Union[SnsTopic, None]: """Create a SnsTopic if the notification parameter is defined. :param notification: email address as string or list of email addresses to be subscribed. :return: """ if notification is not None: name = f"{self.name}-notification" return SnsTopic(self.datajob_stack, name, notification) def _integrate_notification_in_workflow(self, chain_of_tasks: Chain) -> Chain: """If a notification is defined we configure an SNS with email subscription to alert the user if the stepfunctions workflow failed or succeeded. :param chain_of_tasks: the workflow definition that contains all the steps we want to execute. :return: if notification is set, we adapt the workflow to include an SnsPublishStep on failure or on success. If notification is not set, we return the workflow as we received it. """ if self.notification: logger.debug( "A notification is configured, " "implementing a notification on Error or when the stepfunctions workflow succeeds." ) failure_notification = SnsPublishStep( "FailureNotification", parameters={ "TopicArn": self.notification.get_topic_arn(), "Message": f"Stepfunctions workflow {self.unique_name} Failed.", }, ) pass_notification = SnsPublishStep( "SuccessNotification", parameters={ "TopicArn": self.notification.get_topic_arn(), "Message": f"Stepfunctions workflow {self.unique_name} Succeeded.", }, ) catch_error = Catch(error_equals=["States.ALL"], next_step=failure_notification) workflow_with_notification = Parallel(state_id="notification") workflow_with_notification.add_branch(chain_of_tasks) workflow_with_notification.add_catch(catch_error) workflow_with_notification.next(pass_notification) return Chain([workflow_with_notification]) logger.debug( "No notification is configured, returning the workflow definition." ) return chain_of_tasks def __enter__(self): """first steps we have to do when entering the context manager.""" logger.info(f"creating step functions workflow for {self.unique_name}") _set_workflow(self) return self def __exit__(self, exc_type, exc_value, traceback) -> None: """steps we have to do when exiting the context manager.""" self.build_workflow() _set_workflow(None) logger.info(f"step functions workflow {self.unique_name} created")
def test_wait_example(): chain = Chain() chain.append( Task('FirstState', resource='arn:aws:lambda:us-east-1:1234567890:function:StartState' )) chain.append(Wait('wait_using_seconds', seconds=10)) chain.append(Wait('wait_using_timestamp', timestamp='2015-09-04T01:59:00Z')) chain.append( Wait('wait_using_timestamp_path', timestamp_path='$.expirydate')) chain.append( Wait('wait_using_seconds_path', seconds_path='$.expiryseconds')) chain.append( Task( 'FinalState', resource='arn:aws:lambda:us-east-1:1234567890:function:EndLambda')) result = Graph(chain).to_dict() assert result == { 'StartAt': 'FirstState', 'States': { 'FirstState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:StartState', 'Next': 'wait_using_seconds' }, 'wait_using_seconds': { 'Type': 'Wait', 'Seconds': 10, 'Next': 'wait_using_timestamp' }, 'wait_using_timestamp': { 'Type': 'Wait', 'Timestamp': '2015-09-04T01:59:00Z', 'Next': 'wait_using_timestamp_path' }, 'wait_using_timestamp_path': { 'Type': 'Wait', 'TimestampPath': '$.expirydate', 'Next': 'wait_using_seconds_path' }, 'wait_using_seconds_path': { 'Type': 'Wait', 'SecondsPath': '$.expiryseconds', 'Next': 'FinalState', }, 'FinalState': { 'Type': 'Task', 'Resource': 'arn:aws:lambda:us-east-1:1234567890:function:EndLambda', 'End': True } } }