Esempio n. 1
class FSMContext(dict):
    """ A finite state machine context instance. """
    def __init__(self, initialState, currentState=None, machineName=None, instanceName=None,
                 retryOptions=None, url=None, queueName=None, data=None, contextTypes=None,
                 method='GET', persistentLogging=False, obj=None, headers=None, globalTaskTarget=None,
        """ Constructor
        @param initialState: a State instance 
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm  
        @param queueName: the name of the appengine task queue 
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks 
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on  
        @param globalTaskTarget: the machine-level target configuration parameter
        assert queueName
        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction 
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
        self.logger = Logger(self, obj=obj, persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers
        self.globalTaskTarget = globalTaskTarget
        self.useRunOnceSemaphore = useRunOnceSemaphore
        # the following is monkey-patched from for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue # pylint: disable-msg=C0103
    def _generateUniqueInstanceName(self):
        """ Generates a unique instance name for this machine. 
        @return: a FSMContext instanceName that is (pretty darn likely to be) unique
        utcnow = datetime.datetime.utcnow()
        dateStr = utcnow.strftime('%Y%m%d%H%M%S')
        randomStr = ''.join(random.sample(constants.CHARS_FOR_RANDOM, 6))
        return '%s-%s-%s' % (self.machineName, dateStr, randomStr)
    def putTypedValue(self, key, value):
        """ Sets a value on context[key], but casts the value according to self.contextTypes. """

        # cast the value to the appropriate type TODO: should this be in FSMContext?
        cast = self.contextTypes[key]
        kwargs = {}
        if cast is simplejson.loads:
            kwargs = {'object_hook': models.decode}
        if cast is pickle.loads:
            value = pickle.loads(str(value))
        elif isinstance(value, list):
            value = [cast(v, **kwargs) for v in value]
            value = cast(value, **kwargs)

        # update the context
        self[key] = value
    def generateInitializationTask(self, countdown=0, taskName=None):
        """ Generates a task for initializing the machine. """
        assert == FSM.PSEUDO_INIT
        url = self.buildUrl(self.currentState, FSM.PSEUDO_INIT)
        params = self.buildParams(self.currentState, FSM.PSEUDO_INIT)
        taskName = taskName or self.getTaskName(FSM.PSEUDO_INIT)
        transition = self.currentState.getTransition(FSM.PSEUDO_INIT)
        task = Task(name=taskName, 
        return task
    def fork(self, data=None):
        """ Forks the FSMContext. 
        When an FSMContext is forked, an identical copy of the finite state machine is generated
        that will have the same event dispatched to it as the machine that called .fork(). The data
        parameter is useful for allowing each forked instance to operate on a different bit of data.
        @param data: an option mapping of data to apply to the forked FSMContext 
        obj = self.__obj
        if obj.get(constants.FORKED_CONTEXTS_PARAM) is None:
            obj[constants.FORKED_CONTEXTS_PARAM] = []
        forkedContexts = obj.get(constants.FORKED_CONTEXTS_PARAM)
        data = copy.copy(data) or {}
        data[constants.FORK_PARAM] = len(forkedContexts)
    def spawn(self, machineName, contexts, countdown=0, method='POST', 
              _currentConfig=None, taskName=None):
        """ Spawns new machines.
        @param machineName the machine to spawn
        @param contexts a list of contexts (dictionaries) to spawn the new machine(s) with; multiple contexts will spawn
                        multiple machines
        @param countdown the countdown (in seconds) to wait before spawning machines
        @param method the method ('GET' or 'POST') to invoke the machine with (default: POST)
        @param _currentConfig test injection for configuration
        @param taskName used for idempotency; will become the root of the task name for the actual task queued
        # using the current task name as a root to startStateMachine will make this idempotent
        taskName = taskName or self.__obj[constants.TASK_NAME_PARAM]
        startStateMachine(machineName, contexts, taskName=taskName, method=method, countdown=countdown, 
                          _currentConfig=_currentConfig, headers=self.headers)
    def initialize(self):
        """ Initializes the FSMContext. Queues a Task (so that we can benefit from auto-retry) to dispatch
        an event and take the machine from 'pseudo-init' into the state machine's initial state, as 
        defined in the fsm.yaml file.
        @param data: a dict of initial key, value pairs to stuff into the FSMContext
        @return: an event string to dispatch to the FSMContext to put it into the initialState 
        self[constants.STEPS_PARAM] = 0
        task = self.generateInitializationTask()
        key = db.Key.from_path(_FantasmInstance.kind(), self.instanceName, namespace='')
        _FantasmInstance(key=key, instanceName=self.instanceName).put()
        return FSM.PSEUDO_INIT
    def dispatch(self, event, obj):
        """ The main entry point to move the machine according to an event. 
        @param event: a string event to dispatch to the FSMContext
        @param obj: an object that the FSMContext can operate on  
        @return: an event string to dispatch to the FSMContext
        self.__obj = self.__obj or obj # hold the obj object for use during this context

        # store the starting state and event for the handleEvent() method
        self.startingState = self.currentState
        self.startingEvent = event

        nextEvent = None
            nextEvent = self.currentState.dispatch(self, event, obj)
            if obj.get(constants.FORKED_CONTEXTS_PARAM):
                # pylint: disable-msg=W0212
                # - accessing the protected method is fine here, since it is an instance of the same class
                tasks = []
                for context in obj[constants.FORKED_CONTEXTS_PARAM]:
                    context[constants.STEPS_PARAM] = int(context.get(constants.STEPS_PARAM, '0')) + 1
                    task = context.queueDispatch(nextEvent, queue=False)
                    if task: # fan-in magic
                        if not task.was_enqueued: # fan-in always queues
                    if tasks:
                        transition = self.currentState.getTransition(nextEvent)
                        _queueTasks(self.Queue, transition.queueName, tasks)
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                                     'Unable to queue fork Tasks %s as it/they already exists. (Machine %s, State %s)',
                                     [ for task in tasks if not task.was_enqueued],
            if nextEvent:
                self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    # FIXME: when this happens, it means there was failure shortly after queuing the Task, or
                    #        possibly even with queuing the Task. when this happens there is a chance that 
                    #        two states in the machine are executing simultaneously, which is may or may not
                    #        be a good thing, depending on what each state does. gracefully handling this 
                    #        exception at least means that this state will terminate.
                    self.logger.critical('Unable to queue next Task as it already exists. (Machine %s, State %s)',
                # if we're not in a final state, emit a log message
                # FIXME - somehow we should avoid this message if we're in the "last" step of a continuation...
                if not self.currentState.isFinalState and not obj.get(constants.TERMINATED_PARAM):
                    self.logger.critical('Non-final state did not emit an event. Machine has terminated in an ' +
                                     'unknown state. (Machine %s, State %s)' %
                # if it is a final state, then dispatch the pseudo-final event to finalize the state machine
                elif self.currentState.isFinalState and self.currentState.exitAction:
                    self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
        except Exception:
            self.logger.exception("FSMContext.dispatch is handling the following exception:")
            self._handleException(event, obj)
        return nextEvent
    def continuation(self, nextToken):
        """ Performs a continuation be re-queueing an FSMContext Task with a slightly modified continuation
        token. self.startingState and self.startingEvent are used in the re-queue, so this can be seen as a
        'fork' of the current context.
        @param nextToken: the next continuation token
        assert not self.get(constants.INDEX_PARAM) # fan-out after fan-in is not allowed
        step = str(self[constants.STEPS_PARAM]) # needs to be a str key into a json dict
        # make a copy and set the currentState to the startingState of this context
        context = self.clone()
        context.currentState = self.startingState
        # update the generation and continuation params
        gen = context.get(constants.GEN_PARAM, {})
        gen[step] = gen.get(step, 0) + 1
        context[constants.GEN_PARAM] = gen
        context[constants.CONTINUATION_PARAM] = nextToken
            # pylint: disable-msg=W0212
            # - accessing the protected method is fine here, since it is an instance of the same class
            transition = self.startingState.getTransition(self.startingEvent)
            context._queueDispatchNormal(self.startingEvent, queue=True, queueName=transition.queueName,
                                         retryOptions=transition.retryOptions, taskTarget=transition.taskTarget)
        except (TaskAlreadyExistsError, TombstonedTaskError):
            # this can happen when currentState.dispatch() previously succeeded in queueing the continuation
            # Task, but failed with the doAction.execute() call in a _previous_ execution of this Task.
            # NOTE: this prevent the dreaded "fork bomb" 
  'Unable to queue continuation Task as it already exists. (Machine %s, State %s)',
    def queueDispatch(self, nextEvent, queue=True):
        """ Queues a .dispatch(nextEvent) call in the appengine Task queue. 
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @return: a taskqueue.Task instance which may or may not have been queued already
        assert nextEvent is not None
        # self.currentState is already transitioned away from self.startingState
        transition = self.currentState.getTransition(nextEvent)
        queueName = self.__obj.get(constants.QUEUE_NAME_PARAM) or transition.queueName
            task = self._queueDispatchFanIn(nextEvent,,
                                            queueName=queueName, taskTarget=transition.taskTarget)
            task = self._queueDispatchNormal(nextEvent, queue=queue, countdown=transition.countdown,
                                             queueName=queueName, taskTarget=transition.taskTarget)
        return task
    def _queueDispatchNormal(self, nextEvent, queue=True, countdown=0, retryOptions=None, queueName=None,
        """ Queues a call to .dispatch(nextEvent) in the appengine Task queue. 
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @param countdown: the number of seconds to countdown before the queued task fires
        @param retryOptions: the RetryOptions for the task
        @param queueName: the queue name to Queue into 
        @param taskTarget: the task target parameter
        @return: a taskqueue.Task instance which may or may not have been queued already
        assert nextEvent is not None
        assert queueName
        url = self.buildUrl(self.currentState, nextEvent)
        params = self.buildParams(self.currentState, nextEvent)
        taskName = self.getTaskName(nextEvent)
        task = Task(name=taskName, method=self.method, url=url, params=params, countdown=countdown,
                    retry_options=retryOptions, headers=self.headers, target=taskTarget)
        if queue:
            if not task.was_enqueued:
                self.logger.critical('Task "%s" was not enqueued.', taskName)
        return task
    def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None, taskTarget=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @param taskTarget: the task target parameter
        @return: a taskqueue.Task instance which may or may not have been queued already
        assert nextEvent is not None
        assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
        assert queueName
        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)
        # transfer the fan-in-group into the context (under a fixed value key) so that states beyond 
        # the fan-in get unique Task names
        # FIXME: this will likely change once we formalize what to do post fan-in
        transition = self.currentState.getTransition(nextEvent)
        if self.get( is not None:
            self[constants.FAN_IN_GROUP_PARAM] = self[]
        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()
        # (***)
        # grab the lock - memcache.incr()
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for 
        #    work packages across multiple-retry attempts, so this seems like the 
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)
        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)
        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
          "Work index changed from '%s' to '%s' on retry.", payload, workIndex)
                    workIndex = payload
        # update this here so it gets written down into the work package too
        self[constants.INDEX_PARAM] = index
        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
        key = db.Key.from_path(_FantasmFanIn.kind(), keyName, namespace='')
        work = _FantasmFanIn(context=self, workIndex=workIndex, key=key)
        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False)
        # put the work item
        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...) 
        # release the lock - memcache.decr()
            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            taskName = '%s-%d' % (taskNameBase, index)
            task = Task(name=taskName,
                        eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
            if not task.was_enqueued:
                self.logger.critical('Task "%s" was not enqueued.', taskName)
            return task
        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass # Fan-in magic
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)
        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        self.logger.debug('Index: %s', index)
        taskNameBase = self.getTaskName(event, fanIn=True)
        # see comment (***) in self._queueDispatchFanIn 
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM])
        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)
        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports, .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts, guarded=False):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName
                self.guarded = guarded
        # see comment (A) in self._queueDispatchFanIn(...)
        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        khash = knuthHash(index)
        self.logger.debug('knuthHash of index: %s', khash)
        workIndex = '%s-%d' % (taskNameBase, khash)
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]):
      "Fan-in idempotency guard for workIndex '%s', not processing any work items.", 
                return FSMContextList(self, [], guarded=True) # don't operate over the data again
        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all(namespace='') \
                             .filter('workIndex =', workIndex) \
        # construct a list of FSMContexts
        contexts = [self.clone(replaceData=r.context) for r in query]
        return FSMContextList(self, contexts)
    def _getTaskRetryLimit(self):
        """ Method that returns the maximum number of retries for this particular dispatch 
        @param obj: an object that the FSMContext can operate on  
        # get task_retry_limit configuration
            transition = self.startingState.getTransition(self.startingEvent)
            taskRetryLimit = transition.retryOptions.task_retry_limit
        except UnknownEventError:
            # can't find the transition, use the machine-level default
            taskRetryLimit = self.retryOptions.task_retry_limit
        return taskRetryLimit
    def _handleException(self, event, obj):
        """ Method for child classes to override to handle exceptions. 
        @param event: a string event 
        @param obj: an object that the FSMContext can operate on  
        retryCount = obj.get(constants.RETRY_COUNT_PARAM, 0)
        taskRetryLimit = self._getTaskRetryLimit()
        if taskRetryLimit and retryCount >= taskRetryLimit:
            # need to permanently fail
            self.logger.critical('Max-requeues reached. Machine has terminated in an unknown state. ' +
                             '(Machine %s, State %s, Event %s)',
                             self.machineName,, event, exc_info=True)
            # re-raise, letting App Engine TaskRetryOptions kill the task
            # re-raise the exception
            self.logger.warning('Exception occurred processing event. Task will be retried. ' +
                            '(Machine %s, State %s)',
                            self.machineName,, exc_info=True)
            # this line really just allows unit tests to work - the request is really dead at this point
            self.currentState = self.startingState
    def buildUrl(self, state, event):
        """ Builds the taskqueue url. 
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a url that can be used to build a taskqueue.Task instance to .dispatch(event)
        assert state and event
        return self.url + '%s/%s/%s/' % (, 
    def buildParams(self, state, event):
        """ Builds the taskqueue params. 
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a dict suitable to use in constructing a url (GET) or using as params (POST)
        assert state and event
        params = {constants.STATE_PARAM:, 
                  constants.EVENT_PARAM: event,
                  constants.INSTANCE_NAME_PARAM: self.instanceName}
        for key, value in self.items():
            if key not in constants.NON_CONTEXT_PARAMS:
                if self.contextTypes.get(key) is simplejson.loads:
                    value = simplejson.dumps(value, cls=models.Encoder)
                if self.contextTypes.get(key) is pickle.loads:
                    value = pickle.dumps(value)
                if isinstance(value, datetime.datetime):
                    value = str(int(time.mktime(value.utctimetuple())))
                if isinstance(value, dict):
                    # FIXME: should we issue a warning that they should update fsm.yaml?
                    value = simplejson.dumps(value, cls=models.Encoder)
                valueIsNotBasestring = False
                if isinstance(value, (list, tuple)):
                    for v in value:
                        if not isinstance(v, basestring):
                            valueIsNotBasestring = True
                elif not isinstance(value, basestring):
                    valueIsNotBasestring = True
                if valueIsNotBasestring:
                    if key not in self.contextTypes.keys():
                        self.logger.warning("Attempting to put an object in the FSMContext without specifying an "
                                            "entry for key '%s' in 'context_types' in the yaml for machineName '%s'. "
                                            "There will likely be conversion issues (ie. booleans turned into "
                                            "strings).", key, self.machineName)
                if isinstance(value, (list, tuple)) and len(value) == 1:
                    key = key + '[]' # used to preserve lists of length=1 - see for inverse

                params[key] = value
        return params

    def getTaskName(self, nextEvent, instanceName=None, fanIn=False):
        """ Returns a task name that is unique for a specific dispatch 
        @param nextEvent: the event to dispatch
        @return: a task name that can be used to build a taskqueue.Task instance to .dispatch(nextEvent)
        transition = self.currentState.getTransition(nextEvent)
        parts = []
        parts.append(instanceName or self.instanceName)
        if self.get(constants.GEN_PARAM):
            for (step, gen) in self[constants.GEN_PARAM].items():
                parts.append('continuation-%s-%s' % (step, gen))
        if self.get(constants.FORK_PARAM):
            parts.append('fork-' + str(self[constants.FORK_PARAM]))
        # post-fan-in we need to store the workIndex in the task name to avoid duplicates, since
        # we popped the generation off during fan-in
        # FIXME: maybe not pop the generation in fan-in?
        # FIXME: maybe store this in the instanceName?
        # FIXME: i wish this was easier to get right :-)
        if (not fanIn) and self.get(constants.INDEX_PARAM):
            parts.append('work-index-' + str(self[constants.INDEX_PARAM]))
        parts.append('step-' + str(self[constants.STEPS_PARAM]))
        if self.get(constants.FAN_IN_GROUP_PARAM) is not None:
            parts.append('group-' + str(self[constants.FAN_IN_GROUP_PARAM]))
        return '--'.join(parts)
    def clone(self, instanceName=None, updateData=None, replaceData=None):
        """ Returns a copy of the FSMContext.
        @param instanceName: the instance name to optionally apply to the clone
        @param updateData: a dict/mapping of data to optionally apply (.update()) to the clone
        @param replaceData: a dict/mapping of data to optionally apply (.clear()/.update()) to the clone
        @return: a new FSMContext instance
        assert (not updateData) or (not replaceData), "cannot update and replace data at the same time"

        #context = copy.deepcopy(self)
        # shallow copy the context
        context = copy.copy(self)
        # deepcopy the dictionary portion of the context
        deepcopy_dict = copy.deepcopy(dict(self))

        if instanceName:
            context.instanceName = instanceName
        if updateData:
        if replaceData:
        return context
