Example #1
0
    def _start_retries(self, retry_data, obj, recovering=False):
        """
        Saves the current payload, with modified retry information, to DynamoDB
        so that a query can pick up the items, and re-execute the payload at a
        future point.

        :param retry_data: a dict like {'system_context': {...}, 'user_context': {...}}
        :param obj: a dict
        """
        retry_system_context = retry_data[PAYLOAD.SYSTEM_CONTEXT]
        serialized = json.dumps(retry_data, **json_dumps_additional_kwargs())

        for primary in [True, False]:
            try:
                # save the retry entity
                # https://www.awsarchitectureblog.com/2015/03/backoff.html
                # "full jitter"
                cap, base, attempt = 60., 1., retry_system_context[
                    SYSTEM_CONTEXT.RETRIES]
                sleep = random.uniform(0, min(cap, base * 2**attempt))
                return start_retries(self,
                                     time.time() + sleep,
                                     serialized,
                                     primary=primary,
                                     recovering=recovering)
            except ClientError:
                # log an error to at least expose the error
                self._queue_error(
                    ERRORS.ERROR,
                    'Unable to save last payload for retry (primary=%s).' %
                    primary,
                    exc_info=True)
Example #2
0
    def _store_checkpoint(self, obj):
        """
        Saves the last response from Context._send_next_event_for_dispatch so that
        a terminated machine can be started back up using the saved information.

        :param obj: a dict.
        """
        # save the last successful dispatch to aws. on kinesis, the sent data looks like
        # {u'ShardId': u'shardId-000000000000', u'SequenceNumber': u'49559000...18786'} and thus
        # has sufficient information to go and seek the record directory from kinesis, and to
        # restart the fsm using the saved state.
        if obj.get(OBJ.SENT):

            for primary in [True, False]:
                try:
                    return store_checkpoint(
                        self,
                        json.dumps(obj[OBJ.SENT],
                                   **json_dumps_additional_kwargs()),
                        primary=primary)
                except ClientError:
                    # if unable to save the last sent message, then recovery/checkpointing
                    # will be missing the more recent executed state. recovering may be
                    # complicated, especially since the last transition has been marked as
                    # successfully dispatched
                    self._queue_error(
                        ERRORS.ERROR,
                        'Unable to save last sent data (primary=%s).' %
                        primary,
                        exc_info=True)
Example #3
0
 def get_sns_record(self):
     return {
         'eventSource': 'aws:sns',
         'Sns': {
             'Message':
             json.dumps({"mess": "age"}, **json_dumps_additional_kwargs())
         }
     }
Example #4
0
 def get_kinesis_record(self):
     return {
         'eventSource': 'aws:kinesis',
         'kinesis': {
             'data':
             base64.b64encode(
                 json.dumps({
                     'machine_name': 'barfoo'
                 }, **json_dumps_additional_kwargs()).encode('utf-8'))
         }
     }
def lambda_step_handler(lambda_event, lambda_context):
    """
    AWS Lambda handler for executing state machines.

    :param lambda_event: a dict event from AWS Lambda
    :param lambda_context: a dict context from AWS Lambda
    :return: a dict event to pass along to AWS Step Functions orchestration
    """
    obj = {OBJ.SOURCE: AWS.STEP_FUNCTION, OBJ.LAMBDA_CONTEXT: lambda_context}
    payload = json.dumps(lambda_event, **json_dumps_additional_kwargs()
                         )  # Step Function just passes straight though
    return _process_payload_step(payload, obj)
Example #6
0
def search_for_machine(filename='fsm.yaml'):
    """
    Searches the .yaml hierarchy for the correct machine.

    :param filename: a path to a fsm.yaml file
    :return:
    """

    for machine_dict in get_current_configuration(
            filename=filename)[CONFIG.MACHINES]:
        if CONFIG.IMPORT in machine_dict:
            search_for_machine(filename=machine_dict[CONFIG.IMPORT])
            continue
        if machine_dict[CONFIG.NAME] == args.machine_name:
            data = output_machine_dict(machine_dict)
            print(json.dumps(data, indent=2, **json_dumps_additional_kwargs()))
            return
def lambda_api_handler(lambda_event, lambda_context):
    """
    AWS Lambda handler for executing state machines.

    :param lambda_event: a dict event from AWS Lambda
    :param lambda_context: a dict context from AWS Lambda
    """
    try:
        obj = {OBJ.SOURCE: AWS.GATEWAY, OBJ.LAMBDA_CONTEXT: lambda_context}
        payload = json.dumps(lambda_event, **json_dumps_additional_kwargs()
                             )  # API Gateway just passes straight though
        _process_payload(payload, obj)

    # in batch mode, we don't want a single error to cause the the entire batch
    # to retry. for that reason, we have opted to gobble all the errors here
    # and handle retries withing the fsm dispatch code.
    except Exception:
        lambda_event = AWS_LAMBDA.REDACTED
        logger.exception('Critical error handling lambda: %s', lambda_event)
Example #8
0
    def _dispatch_to_current_state(self, event, obj):
        """
        Dispatches the event to the current state, then send the next event
        onto Kinesis/DynamoDB for subsequent processing.

        :param event: a str event.
        :param obj: a dict.
        """
        # dispatch the event using the user context only
        next_event = self.current_state.dispatch(self, event, obj)

        # dispatch local transitions without enqueing more messages
        while next_event \
                and self.current_state \
                and self.current_state.get_transition(next_event) \
                and self.current_state.get_transition(next_event).local:
            next_event = self.current_state.dispatch(self, next_event, obj)

        # if there are more events
        if next_event:

            # make a full copy
            ctx = Context.from_payload_dict(self.to_payload_dict())
            ctx.steps += 1
            ctx.retries = 0
            ctx.current_event = next_event
            serialized = json.dumps(ctx.to_payload_dict(),
                                    **json_dumps_additional_kwargs())

            # dispatch the next event to aws kinesis/dynamodb
            sent = self._send_next_event_for_dispatch(serialized, obj)

            # things are falling off the rails
            if not sent:
                self._queue_error(
                    ERRORS.DISPATCH,
                    'System error during dispatch. Failover to retry stream.')
                sent = self._send_next_event_for_dispatch(serialized,
                                                          obj,
                                                          recovering=True)

            obj[OBJ.SENT] = sent
Example #9
0
def start_state_machines(machine_name,
                         user_contexts,
                         correlation_ids=None,
                         current_state=STATE.PSEUDO_INIT,
                         current_event=STATE.PSEUDO_INIT,
                         additional_delay_seconds=0):
    """
    Insert a bulk AWS SQS/Kinesis/SNS/DynamoDB/... message that will kick off several state machines.

    :param machine_name: a str name for the machine to start.
    :param user_contexts: a list of dict of initial data for the state machines.
    :param correlation_ids: a list of guids for the fsms, or list of Nones
      if the system should define then automatically.
    :param current_state: the state to start the machines in.
    :param current_event: the event to start the machines with.
    :param additional_delay_seconds: number of seconds to insert between state transitions
      (for streams that support delay)
    """
    all_data = []
    correlation_ids = correlation_ids or [
        uuid.uuid4().hex for i in range(len(user_contexts))
    ]
    for i, user_context in enumerate(user_contexts):
        correlation_id = correlation_ids[i]
        started_at = int(time.time())
        system_context = {
            SYSTEM_CONTEXT.STARTED_AT: started_at,
            SYSTEM_CONTEXT.MACHINE_NAME: machine_name,
            SYSTEM_CONTEXT.CURRENT_STATE: current_state,
            SYSTEM_CONTEXT.CURRENT_EVENT: current_event,
            SYSTEM_CONTEXT.STEPS: 0,
            SYSTEM_CONTEXT.RETRIES: 0,
            SYSTEM_CONTEXT.CORRELATION_ID: correlation_id,
            SYSTEM_CONTEXT.ADDITIONAL_DELAY_SECONDS: additional_delay_seconds
        }
        payload = {
            PAYLOAD.VERSION: PAYLOAD.DEFAULT_VERSION,
            PAYLOAD.SYSTEM_CONTEXT: system_context,
            PAYLOAD.USER_CONTEXT: user_context
        }
        all_data.append(json.dumps(payload, **json_dumps_additional_kwargs()))
    send_next_events_for_dispatch(None, all_data, correlation_ids)
Example #10
0
 def test_process_payload_step(self, mock_FSM):
     payload = json.dumps(
         {
             'system_context': {
                 'machine_name': 'barfoo',
                 'current_state': 'foobar',
                 'stream': 's',
                 'table': 't',
                 'topic': 'z',
                 'metrics': 'm'
             },
             'user_context': {}
         }, **json_dumps_additional_kwargs())
     obj = {}
     mock_FSM.return_value.create_FSM_instance.return_value\
         .system_context.return_value.get.return_value = 'pseudo-init'
     _process_payload_step(payload, obj)
     mock_FSM.return_value.create_FSM_instance.assert_called_with(
         'barfoo',
         initial_system_context={
             'topic': 'z',
             'machine_name': 'barfoo',
             'stream': 's',
             'current_state': 'foobar',
             'metrics': 'm',
             'table': 't'
         },
         initial_user_context={},
         initial_state_name='foobar')
     mock_FSM.return_value.create_FSM_instance.return_value.current_state.dispatch.assert_called_with(
         mock_FSM.return_value.create_FSM_instance.return_value,
         'pseudo-init', {
             'payload':
             '{"system_context": {"current_state": "foobar", "machine_name": '
             '"barfoo", "metrics": "m", "stream": "s", "table": "t", '
             '"topic": "z"}, "user_context": {}}'
         })
     self.assertEqual({'payload': payload}, obj)
Example #11
0
def start_state_machine(machine_name,
                        initial_context,
                        correlation_id=None,
                        current_state=STATE.PSEUDO_INIT,
                        current_event=STATE.PSEUDO_INIT,
                        additional_delay_seconds=0):
    """
    Insert an AWS SQS/Kinesis/SNS/DynamoDB/... message that will kick off a state machine.

    :param machine_name: a str name for the machine to start.
    :param initial_context: a dict of initial data for the state machine.
    :param correlation_id: the guid for the fsm, or None if the system should
      define it automatically.
    :param current_state: the state to start the machine in.
    :param current_event: the event to start the machine with.
    :param additional_delay_seconds: number of seconds to insert between state transitions
      (for streams that support delay)
    """
    correlation_id = correlation_id or uuid.uuid4().hex
    system_context = {
        SYSTEM_CONTEXT.STARTED_AT: int(time.time()),
        SYSTEM_CONTEXT.MACHINE_NAME: machine_name,
        SYSTEM_CONTEXT.CURRENT_STATE: current_state,
        SYSTEM_CONTEXT.CURRENT_EVENT: current_event,
        SYSTEM_CONTEXT.STEPS: 0,
        SYSTEM_CONTEXT.RETRIES: 0,
        SYSTEM_CONTEXT.CORRELATION_ID: correlation_id,
        SYSTEM_CONTEXT.ADDITIONAL_DELAY_SECONDS: additional_delay_seconds
    }
    payload = {
        PAYLOAD.VERSION: PAYLOAD.DEFAULT_VERSION,
        PAYLOAD.SYSTEM_CONTEXT: system_context,
        PAYLOAD.USER_CONTEXT: initial_context
    }
    send_next_event_for_dispatch(
        None, json.dumps(payload, **json_dumps_additional_kwargs()),
        correlation_id)
Example #12
0
    client.start(container=container)
    stdout = client.logs(container, stdout=True, stream=True)
    for line in stdout:
        sys.stdout.write(line)
    stderr = client.logs(container, stderr=True, stream=True)
    for line in stderr:
        sys.stderr.write(line)
    return_code = client.wait(container)

except Exception:
    logging.exception('')
    raise

finally:

    if not environment:
        sys.stderr.write(FATAL_ENVIRONMENT_ERROR)
        sys.exit(1)

    # FSM_CONTEXT is the environment variable used by aws_lambda_fsm.utils.ECSTaskEntryAction
    event = DONE_EVENT if return_code == 0 else FAIL_EVENT
    payload_encoded = environment[ENVIRONMENT.FSM_CONTEXT]
    payload = json.loads(base64.b64decode(payload_encoded), **json_loads_additional_kwargs())
    payload[PAYLOAD.SYSTEM_CONTEXT][SYSTEM_CONTEXT.CURRENT_EVENT] = event
    serialized = json.dumps(payload, **json_dumps_additional_kwargs())
    send_next_event_for_dispatch(
        None,
        serialized,
        payload[PAYLOAD.SYSTEM_CONTEXT][SYSTEM_CONTEXT.CORRELATION_ID]
    )
Example #13
0
 def json_dumps_additional_kwargs_using_settings(self, mock_settings):
     mock_settings.JSON_DUMPS_ADDITIONAL_KWARGS = {'default': lambda x: "foobar"}
     self.assertEquals({'sort_keys', 'default'}, json_dumps_additional_kwargs().keys())
     self.assertEquals("foobar", json_dumps_additional_kwargs()['default']('~~~'))
Example #14
0
 def get_sqs_record(self):
     return {
         'eventSource': 'aws:sqs',
         'body': json.dumps({"mess": "age"},
                            **json_dumps_additional_kwargs())
     }
Example #15
0
 def increment_error_counters(self, data, dimensions):
     self.errors.send(
         json.dumps((data, dimensions), **json_dumps_additional_kwargs()))
     return {'test': 'stub'}
            # create the lambda event
            lambda_event = {AWS_LAMBDA.Records: []}

            # populate the lambda event
            for sqs_message in sqs_messages:
                body = sqs_message[AWS_SQS.MESSAGE.Body]
                tmp = {
                    AWS_LAMBDA.EventSource: AWS_LAMBDA.EVENT_SOURCE.SQS,
                    AWS_LAMBDA.SQS_RECORD.BODY: body
                }
                lambda_event[AWS_LAMBDA.Records].append(tmp)

            # and call the handler with the records
            if args.lambda_command:
                serialized = json.dumps(lambda_event,
                                        **json_dumps_additional_kwargs())
                quoted = shellquote(serialized)
                subprocess.call(
                    ['/bin/bash', '-c', args.lambda_command + " " + quoted])
            else:
                lambda_handler(lambda_event, lambda_context)

            # after processing, the SQS messages need to be deleted
            response = sqs_conn.delete_message_batch(
                QueueUrl=sqs_queue_url,
                Entries=[{
                    AWS_SQS.MESSAGE.Id:
                    str(i),
                    AWS_SQS.MESSAGE.ReceiptHandle:
                    sqs_message[AWS_SQS.MESSAGE.ReceiptHandle]
                } for i, sqs_message in enumerate(sqs_messages)])
Example #17
0
 def test_json_dumps_additional_kwargs_defaults(self):
     self.assertEquals({'sort_keys', 'default'}, set(json_dumps_additional_kwargs().keys()))
     self.assertEquals("<not_serializable>", json_dumps_additional_kwargs()['default']('~~~'))
    def execute(self, context, obj):
        """
        Action that launches and ECS task.

        The API for using this class is as follows:

        {
           'context_var': 'context_value',              # normal context variable
           'task_details': {                            # dictionary of all the states that run images
              'state_name_1': {                         # first state name (as in fsm.yaml)
                                                        # cluster to run image for state_name_1
                'cluster_arn': 'arn:aws:ecs:region:1234567890:cluster/foobar',
                'container_image': 'host/corp/image:12345' # image for state_name_1
              },
              'state_name_2': {                         # second state name (as in fsm.yaml)
                'cluster_arn': 'arn:aws:ecs:eu-west-1:1234567890:cluster/foobar',
                'container_image': 'host/corp/image:12345',
                'runner_task_definition': 'my_runner',  # alternative docker image runner task name
                'runner_container_name': 'my_runner'    # alternative docker image runner container name
              }
            },
            'clone_aws_credentials': True               # flag to copy aws creds from local environment
                                                        # to the container overrides - makes for easier
                                                        # local testing. alternatively, just add permanent
                                                        # credentials to your runner task.
        }

        :param context: a aws_lambda_fsm.fsm.Context instance
        :param obj: a dict
        :return: a string event, or None
        """

        # construct a version of the context that can be base64 encoded
        # and stuffed into a environment variable for the container program.
        # all the container program needs to do is extract this data, add
        # an event, and send the message onto sqs/kinesis/... since this is an
        # ENTRY action, we inspect the current transition for the state we
        # will be in AFTER this code executes.
        ctx = Context.from_payload_dict(context.to_payload_dict())
        ctx.current_state = context.current_transition.target
        ctx.steps += 1
        fsm_context = base64.b64encode(
            json.dumps(ctx.to_payload_dict(),
                       **json_dumps_additional_kwargs()))

        # now finally launch the ECS task using all the data from above
        # as well as tasks etc. specified when the state machine was run.
        state_to_task_details_map = context[TASK_DETAILS_KEY]
        task_details = state_to_task_details_map[
            context.current_transition.target.name]

        # this is the image the user wants to run
        cluster_arn = task_details[CLUSTER_ARN_KEY]
        container_image = task_details[CONTAINER_IMAGE_KEY]

        # this is the task that will run that image
        task_definition = task_details.get(RUNNER_TASK_DEFINITION_KEY,
                                           DEFAULT_RUNNER_TASK_NAME)
        container_name = task_details.get(RUNNER_CONTAINER_NAME_KEY,
                                          DEFAULT_RUNNER_CONTAINER_NAME)

        # setup the environment for the ECS task. this first set of variables
        # are used by the docker container runner image.
        environment = {
            ENVIRONMENT.FSM_CONTEXT: fsm_context,
            ENVIRONMENT.FSM_DOCKER_IMAGE: container_image
        }
        # this second set of variables are used by actual docker image that
        # does actual stuff (pdf processing etc.)
        for name, value in task_details.get(ENVIRONMENT_KEY, {}).items():
            environment[name] = value

        # store the environment and record the guid.
        guid, _ = store_environment(context, environment)

        # stuff the guid and a couple stream settings into the task
        # overrides. the guid allows the FSM_CONTEXT to be loaded from
        # storage, and the FSM_PRIMARY_STREAM_SOURCE allow the call
        # to send_next_event_for_dispatch call to succeed.
        env = [{
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.NAME:
            ENVIRONMENT.FSM_ENVIRONMENT_GUID_KEY,
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.VALUE: guid
        }, {
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.NAME:
            ENVIRONMENT.FSM_PRIMARY_STREAM_SOURCE,
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.VALUE:
            get_primary_stream_source() or ''
        }, {
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.NAME:
            ENVIRONMENT.FSM_SECONDARY_STREAM_SOURCE,
            AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.VALUE:
            get_secondary_stream_source() or ''
        }]

        # this is for local testing
        if context.get(CLONE_AWS_CREDENTIALS_KEY):
            _testing(env)

        # get an ECS connection and start a task.
        conn = get_connection(cluster_arn)

        # run the task
        conn.run_task(cluster=cluster_arn,
                      taskDefinition=task_definition,
                      overrides={
                          AWS_ECS.CONTAINER_OVERRIDES.KEY: [{
                              AWS_ECS.CONTAINER_OVERRIDES.CONTAINER_NAME:
                              container_name,
                              AWS_ECS.CONTAINER_OVERRIDES.ENVIRONMENT.KEY:
                              env
                          }]
                      })

        # entry actions do not return events
        return None
Example #19
0
 def test_custom_encoder(self, mock_settings):
     mock_settings.JSON_DUMPS_ADDITIONAL_KWARGS = {'cls': Encoder}
     self.assertEquals('B', json.dumps("A", **json_dumps_additional_kwargs()))