Beispiel #1
0
    def __init__(self,
                 initialState,
                 currentState=None,
                 machineName=None,
                 instanceName=None,
                 retryOptions=None,
                 url=None,
                 queueName=None,
                 data=None,
                 contextTypes=None,
                 method='GET',
                 persistentLogging=False,
                 obj=None,
                 headers=None):
        """ Constructor
        
        @param initialState: a State instance 
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm  
        @param queueName: the name of the appengine task queue 
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks 
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on  
        """
        assert queueName

        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self,
                             obj=obj,
                             persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers

        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue  # pylint: disable-msg=C0103
Beispiel #2
0
    def __init__(self, initialState, currentState=None, machineName=None, instanceName=None,
                 retryOptions=None, url=None, queueName=None, data=None, contextTypes=None,
                 method='GET', persistentLogging=False, obj=None, headers=None, globalTaskTarget=None,
                 useRunOnceSemaphore=True):
        """ Constructor

        @param initialState: a State instance
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm
        @param queueName: the name of the appengine task queue
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on
        @param globalTaskTarget: the machine-level target configuration parameter
        """
        assert queueName

        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self, obj=obj, persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers
        self.globalTaskTarget = globalTaskTarget
        self.useRunOnceSemaphore = useRunOnceSemaphore

        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue # pylint: disable-msg=C0103
Beispiel #3
0
class FSMContext(dict):
    """ A finite state machine context instance. """
    
    def __init__(self, initialState, currentState=None, machineName=None, instanceName=None,
                 retryOptions=None, url=None, queueName=None, data=None, contextTypes=None,
                 method='GET', persistentLogging=False, obj=None, headers=None):
        """ Constructor
        
        @param initialState: a State instance 
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm  
        @param queueName: the name of the appengine task queue 
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks 
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on  
        """
        assert queueName
        
        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction 
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self, obj=obj, persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers
        
        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue # pylint: disable-msg=C0103
        
    def _generateUniqueInstanceName(self):
        """ Generates a unique instance name for this machine. 
        
        @return: a FSMContext instanceName that is (pretty darn likely to be) unique
        """
        utcnow = datetime.datetime.utcnow()
        dateStr = utcnow.strftime('%Y%m%d%H%M%S')
        randomStr = ''.join(random.sample(constants.CHARS_FOR_RANDOM, 6))
        return '%s-%s-%s' % (self.machineName, dateStr, randomStr)
        
    def putTypedValue(self, key, value):
        """ Sets a value on context[key], but casts the value according to self.contextTypes. """

        # cast the value to the appropriate type TODO: should this be in FSMContext?
        cast = self.contextTypes[key]
        kwargs = {}
        if cast is simplejson.loads:
            kwargs = {'object_hook': models.decode}
        if isinstance(value, list):
            value = [cast(v, **kwargs) for v in value]
        else:
            value = cast(value, **kwargs)

        # update the context
        self[key] = value
        
    def generateInitializationTask(self, countdown=0, taskName=None):
        """ Generates a task for initializing the machine. """
        assert self.currentState.name == FSM.PSEUDO_INIT
        
        url = self.buildUrl(self.currentState, FSM.PSEUDO_INIT)
        params = self.buildParams(self.currentState, FSM.PSEUDO_INIT)
        taskName = taskName or self.getTaskName(FSM.PSEUDO_INIT)
        transition = self.currentState.getTransition(FSM.PSEUDO_INIT)
        task = Task(name=taskName, 
                    method=self.method, 
                    url=url, 
                    params=params, 
                    countdown=countdown, 
                    headers=self.headers, 
                    retry_options=transition.retryOptions)
        return task
    
    def fork(self, data=None):
        """ Forks the FSMContext. 
        
        When an FSMContext is forked, an identical copy of the finite state machine is generated
        that will have the same event dispatched to it as the machine that called .fork(). The data
        parameter is useful for allowing each forked instance to operate on a different bit of data.
        
        @param data: an option mapping of data to apply to the forked FSMContext 
        """
        obj = self.__obj
        if obj.get(constants.FORKED_CONTEXTS_PARAM) is None:
            obj[constants.FORKED_CONTEXTS_PARAM] = []
        forkedContexts = obj.get(constants.FORKED_CONTEXTS_PARAM)
        data = copy.copy(data) or {}
        data[constants.FORK_PARAM] = len(forkedContexts)
        forkedContexts.append(self.clone(updateData=data))
    
    def spawn(self, machineName, contexts, countdown=0, method='POST', 
              _currentConfig=None, taskName=None):
        """ Spawns new machines.
        
        @param machineName the machine to spawn
        @param contexts a list of contexts (dictionaries) to spawn the new machine(s) with; multiple contexts will spawn
                        multiple machines
        @param countdown the countdown (in seconds) to wait before spawning machines
        @param method the method ('GET' or 'POST') to invoke the machine with (default: POST)
        @param _currentConfig test injection for configuration
        @param taskName used for idempotency; will become the root of the task name for the actual task queued
        """
        # using the current task name as a root to startStateMachine will make this idempotent
        taskName = taskName or self.__obj[constants.TASK_NAME_PARAM]
        startStateMachine(machineName, contexts, taskName=taskName, method=method, countdown=countdown, 
                          _currentConfig=_currentConfig, headers=self.headers)
    
    def initialize(self):
        """ Initializes the FSMContext. Queues a Task (so that we can benefit from auto-retry) to dispatch
        an event and take the machine from 'pseudo-init' into the state machine's initial state, as 
        defined in the fsm.yaml file.
        
        @param data: a dict of initial key, value pairs to stuff into the FSMContext
        @return: an event string to dispatch to the FSMContext to put it into the initialState 
        """
        self[constants.STEPS_PARAM] = 0
        task = self.generateInitializationTask()
        self.Queue(name=self.queueName).add(task)
        _FantasmInstance(key_name=self.instanceName, instanceName=self.instanceName).put()
        
        return FSM.PSEUDO_INIT
        
    def dispatch(self, event, obj):
        """ The main entry point to move the machine according to an event. 
        
        @param event: a string event to dispatch to the FSMContext
        @param obj: an object that the FSMContext can operate on  
        @return: an event string to dispatch to the FSMContext
        """
        
        self.__obj = self.__obj or obj # hold the obj object for use during this context

        # store the starting state and event for the handleEvent() method
        self.startingState = self.currentState
        self.startingEvent = event

        nextEvent = None
        try:
            nextEvent = self.currentState.dispatch(self, event, obj)
            
            if obj.get(constants.FORKED_CONTEXTS_PARAM):
                # pylint: disable-msg=W0212
                # - accessing the protected method is fine here, since it is an instance of the same class
                tasks = []
                for context in obj[constants.FORKED_CONTEXTS_PARAM]:
                    context[constants.STEPS_PARAM] = int(context.get(constants.STEPS_PARAM, '0')) + 1
                    task = context.queueDispatch(nextEvent, queue=False)
                    if task: # fan-in magic
                        if not task.was_enqueued: # fan-in always queues
                            tasks.append(task)
                
                try:
                    if tasks:
                        transition = self.currentState.getTransition(nextEvent)
                        _queueTasks(self.Queue, transition.queueName, tasks)
                
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    self.logger.critical(
                                     'Unable to queue fork Tasks %s as it/they already exists. (Machine %s, State %s)',
                                     [task.name for task in tasks if not task.was_enqueued],
                                     self.machineName, 
                                     self.currentState.name)
                
            if nextEvent:
                self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                
                try:
                    self.queueDispatch(nextEvent)
                    
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    #
                    # FIXME: when this happens, it means there was failure shortly after queuing the Task, or
                    #        possibly even with queuing the Task. when this happens there is a chance that 
                    #        two states in the machine are executing simultaneously, which is may or may not
                    #        be a good thing, depending on what each state does. gracefully handling this 
                    #        exception at least means that this state will terminate.
                    self.logger.critical('Unable to queue next Task as it already exists. (Machine %s, State %s)',
                                     self.machineName, 
                                     self.currentState.name)
                    
            else:
                # if we're not in a final state, emit a log message
                # FIXME - somehow we should avoid this message if we're in the "last" step of a continuation...
                if not self.currentState.isFinalState and not obj.get(constants.TERMINATED_PARAM):
                    self.logger.critical('Non-final state did not emit an event. Machine has terminated in an ' +
                                     'unknown state. (Machine %s, State %s)' %
                                     (self.machineName, self.currentState.name))
                # if it is a final state, then dispatch the pseudo-final event to finalize the state machine
                elif self.currentState.isFinalState and self.currentState.exitAction:
                    self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                    self.queueDispatch(FSM.PSEUDO_FINAL)
                    
        except Exception:
            self.logger.exception("FSMContext.dispatch is handling the following exception:")
            self._handleException(event, obj)
            
        return nextEvent
    
    def continuation(self, nextToken):
        """ Performs a continuation be re-queueing an FSMContext Task with a slightly modified continuation
        token. self.startingState and self.startingEvent are used in the re-queue, so this can be seen as a
        'fork' of the current context.
        
        @param nextToken: the next continuation token
        """
        assert not self.get(constants.INDEX_PARAM) # fan-out after fan-in is not allowed
        step = str(self[constants.STEPS_PARAM]) # needs to be a str key into a json dict
        
        # make a copy and set the currentState to the startingState of this context
        context = self.clone()
        context.currentState = self.startingState
        
        # update the generation and continuation params
        gen = context.get(constants.GEN_PARAM, {})
        gen[step] = gen.get(step, 0) + 1
        context[constants.GEN_PARAM] = gen
        context[constants.CONTINUATION_PARAM] = nextToken
        
        try:
            # pylint: disable-msg=W0212
            # - accessing the protected method is fine here, since it is an instance of the same class
            transition = self.startingState.getTransition(self.startingEvent)
            context._queueDispatchNormal(self.startingEvent, queue=True, queueName=transition.queueName,
                                         retryOptions=transition.retryOptions)
            
        except (TaskAlreadyExistsError, TombstonedTaskError):
            # this can happen when currentState.dispatch() previously succeeded in queueing the continuation
            # Task, but failed with the doAction.execute() call in a _previous_ execution of this Task.
            # NOTE: this prevent the dreaded "fork bomb" 
            self.logger.info('Unable to queue continuation Task as it already exists. (Machine %s, State %s)',
                          self.machineName, 
                          self.currentState.name)
    
    def queueDispatch(self, nextEvent, queue=True):
        """ Queues a .dispatch(nextEvent) call in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        
        # self.currentState is already transitioned away from self.startingState
        transition = self.currentState.getTransition(nextEvent)
        queueName = self.__obj.get(constants.QUEUE_NAME_PARAM) or transition.queueName
        if transition.target.isFanIn:
            task = self._queueDispatchFanIn(nextEvent, fanInPeriod=transition.target.fanInPeriod,
                                            retryOptions=transition.retryOptions,
                                            queueName=queueName)
        else:
            task = self._queueDispatchNormal(nextEvent, queue=queue, countdown=transition.countdown,
                                             retryOptions=transition.retryOptions,
                                             queueName=queueName)
            
        return task
        
    def _queueDispatchNormal(self, nextEvent, queue=True, countdown=0, retryOptions=None, queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @param countdown: the number of seconds to countdown before the queued task fires
        @param retryOptions: the RetryOptions for the task
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert queueName
        
        url = self.buildUrl(self.currentState, nextEvent)
        params = self.buildParams(self.currentState, nextEvent)
        taskName = self.getTaskName(nextEvent)
        
        task = Task(name=taskName, method=self.method, url=url, params=params, countdown=countdown,
                    retry_options=retryOptions, headers=self.headers)
        if queue:
            self.Queue(name=queueName).add(task)
        
        return task
    
    def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
        assert queueName
        
        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)
        
        # transfer the fan-in-group into the context (under a fixed value key) so that states beyond 
        # the fan-in get unique Task names
        # FIXME: this will likely change once we formalize what to do post fan-in
        transition = self.currentState.getTransition(nextEvent)
        if self.get(transition.target.fanInGroup) is not None:
            self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup]
        
        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()
            
        # (***)
        #
        # grab the lock - memcache.incr()
        # 
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        #
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        #
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for 
        #    work packages across multiple-retry attempts, so this seems like the 
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        #    
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)
        
        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        
        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)
        
        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
                    self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex)
                    workIndex = payload
        
        # update this here so it gets written down into the work package too
        self[constants.INDEX_PARAM] = index
                
        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
        work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName)
        
        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False)
        
        # put the work item
        db.put(work)
        
        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...) 
            
        # release the lock - memcache.decr()
        rwlock.releaseWriteLock(index)
            
        try:
            
            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            task = Task(name='%s-%d' % (taskNameBase, index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers,
                        retry_options=retryOptions)
            self.Queue(name=queueName).add(task)
            return task
        
        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass # Fan-in magic
                
            
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)
        
        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)
        
        # see comment (***) in self._queueDispatchFanIn 
        # 
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM])
            
        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)
        
        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName
                
        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
                
        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info("Fan-in idempotency guard for workIndex '%s', not processing any work items.", 
                                 workIndex)
                return FSMContextList(self, []) # don't operate over the data again
            
        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all() \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')
                             
        # construct a list of FSMContexts
        contexts = [self.clone(replaceData=r.context) for r in query]
        return FSMContextList(self, contexts)
        
    def _getTaskRetryLimit(self):
        """ Method that returns the maximum number of retries for this particular dispatch 
        
        @param obj: an object that the FSMContext can operate on  
        """
        # get task_retry_limit configuration
        try:
            transition = self.startingState.getTransition(self.startingEvent)
            taskRetryLimit = transition.retryOptions.task_retry_limit
        except UnknownEventError:
            # can't find the transition, use the machine-level default
            taskRetryLimit = self.retryOptions.task_retry_limit
        return taskRetryLimit
            
    def _handleException(self, event, obj):
        """ Method for child classes to override to handle exceptions. 
        
        @param event: a string event 
        @param obj: an object that the FSMContext can operate on  
        """
        retryCount = obj.get(constants.RETRY_COUNT_PARAM, 0)
        taskRetryLimit = self._getTaskRetryLimit()
        
        if taskRetryLimit and retryCount >= taskRetryLimit:
            # need to permanently fail
            self.logger.critical('Max-requeues reached. Machine has terminated in an unknown state. ' +
                             '(Machine %s, State %s, Event %s)',
                             self.machineName, self.startingState.name, event, exc_info=True)
            # re-raise, letting App Engine TaskRetryOptions kill the task
            raise
        else:
            # re-raise the exception
            self.logger.warning('Exception occurred processing event. Task will be retried. ' +
                            '(Machine %s, State %s)',
                            self.machineName, self.startingState.name, exc_info=True)
            
            # this line really just allows unit tests to work - the request is really dead at this point
            self.currentState = self.startingState
            
            raise
    
    def buildUrl(self, state, event):
        """ Builds the taskqueue url. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a url that can be used to build a taskqueue.Task instance to .dispatch(event)
        """
        assert state and event
        return self.url + '%s/%s/%s/' % (state.name, 
                                         event, 
                                         state.getTransition(event).target.name)
    
    def buildParams(self, state, event):
        """ Builds the taskqueue params. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a dict suitable to use in constructing a url (GET) or using as params (POST)
        """
        assert state and event
        params = {constants.STATE_PARAM: state.name, 
                  constants.EVENT_PARAM: event,
                  constants.INSTANCE_NAME_PARAM: self.instanceName}
        for key, value in self.items():
            if key not in constants.NON_CONTEXT_PARAMS:
                if self.contextTypes.get(key) is simplejson.loads:
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, datetime.datetime):
                    value = str(int(time.mktime(value.utctimetuple())))
                if isinstance(value, dict):
                    # FIXME: should we issue a warning that they should update fsm.yaml?
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, list) and len(value) == 1:
                    key = key + '[]' # used to preserve lists of length=1 - see handler.py for inverse
                params[key] = value
        return params

    def getTaskName(self, nextEvent, instanceName=None, fanIn=False):
        """ Returns a task name that is unique for a specific dispatch 
        
        @param nextEvent: the event to dispatch
        @return: a task name that can be used to build a taskqueue.Task instance to .dispatch(nextEvent)
        """
        transition = self.currentState.getTransition(nextEvent)
        parts = []
        parts.append(instanceName or self.instanceName)
        
        if self.get(constants.GEN_PARAM):
            for (step, gen) in self[constants.GEN_PARAM].items():
                parts.append('continuation-%s-%s' % (step, gen))
        if self.get(constants.FORK_PARAM):
            parts.append('fork-' + str(self[constants.FORK_PARAM]))
        # post-fan-in we need to store the workIndex in the task name to avoid duplicates, since
        # we popped the generation off during fan-in
        # FIXME: maybe not pop the generation in fan-in?
        # FIXME: maybe store this in the instanceName?
        # FIXME: i wish this was easier to get right :-)
        if (not fanIn) and self.get(constants.INDEX_PARAM):
            parts.append('work-index-' + str(self[constants.INDEX_PARAM]))
        parts.append(self.currentState.name)
        parts.append(nextEvent)
        parts.append(transition.target.name)
        parts.append('step-' + str(self[constants.STEPS_PARAM]))
        if self.get(constants.FAN_IN_GROUP_PARAM) is not None:
            parts.append('group-' + str(self[constants.FAN_IN_GROUP_PARAM]))
        return '--'.join(parts)
    
    def clone(self, instanceName=None, updateData=None, replaceData=None):
        """ Returns a copy of the FSMContext.
        
        @param instanceName: the instance name to optionally apply to the clone
        @param updateData: a dict/mapping of data to optionally apply (.update()) to the clone
        @param replaceData: a dict/mapping of data to optionally apply (.clear()/.update()) to the clone
        @return: a new FSMContext instance
        """
        assert (not updateData) or (not replaceData), "cannot update and replace data at the same time"
        context = copy.deepcopy(self)
        if instanceName:
            context.instanceName = instanceName
        if updateData:
            context.update(updateData)
        if replaceData:
            context.clear()
            context.update(replaceData)
        return context
Beispiel #4
0
class FSMContext(dict):
    """ A finite state machine context instance. """

    def __init__(self, initialState, currentState=None, machineName=None, instanceName=None,
                 retryOptions=None, url=None, queueName=None, data=None, contextTypes=None,
                 method='GET', persistentLogging=False, obj=None, headers=None, globalTaskTarget=None,
                 useRunOnceSemaphore=True):
        """ Constructor

        @param initialState: a State instance
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm
        @param queueName: the name of the appengine task queue
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on
        @param globalTaskTarget: the machine-level target configuration parameter
        """
        assert queueName

        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self, obj=obj, persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers
        self.globalTaskTarget = globalTaskTarget
        self.useRunOnceSemaphore = useRunOnceSemaphore

        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue # pylint: disable-msg=C0103

    INSTANCE_NAME_DTFORMAT = '%Y%m%d%H%M%S'

    def _generateUniqueInstanceName(self):
        """ Generates a unique instance name for this machine.

        @return: a FSMContext instanceName that is (pretty darn likely to be) unique
        """
        utcnow = datetime.datetime.utcnow()
        dateStr = utcnow.strftime(self.INSTANCE_NAME_DTFORMAT)
        randomStr = ''.join(random.sample(constants.CHARS_FOR_RANDOM, 6))
        # note this construction is important for getInstanceStartTime()
        return '%s-%s-%s' % (self.machineName, dateStr, randomStr)

    def getInstanceStartTime(self):
        """ Returns the UTC datetime when the instance was started.

        @return a UTC datetime representing when the instance was started.
        """
        startDatetimeString = self.instanceName.rsplit('-')[-2]
        startDatetime = datetime.datetime.strptime(startDatetimeString, self.INSTANCE_NAME_DTFORMAT)
        return startDatetime

    def putTypedValue(self, key, value):
        """ Sets a value on context[key], but casts the value according to self.contextTypes. """

        # cast the value to the appropriate type TODO: should this be in FSMContext?
        cast = self.contextTypes[key]
        kwargs = {}
        if cast is json.loads:
            kwargs = {'object_hook': models.decode}
        if cast is pickle.loads:
            value = pickle.loads(str(value))
        elif isinstance(value, list):
            value = [cast(v, **kwargs) for v in value]
        else:
            value = cast(value, **kwargs)

        # update the context
        self[key] = value

    def generateInitializationTask(self, countdown=0, taskName=None, transactional=False):
        """ Generates a task for initializing the machine. """
        assert self.currentState.name == FSM.PSEUDO_INIT

        url = self.buildUrl(self.currentState, FSM.PSEUDO_INIT)
        params = self.buildParams(self.currentState, FSM.PSEUDO_INIT)
        if transactional:
            taskName = None
        else:
            taskName = taskName or self.getTaskName(FSM.PSEUDO_INIT)
        transition = self.currentState.getTransition(FSM.PSEUDO_INIT)
        task = Task(name=taskName,
                    method=self.method,
                    url=url,
                    params=params,
                    countdown=countdown,
                    headers=self.headers,
                    retry_options=transition.retryOptions,
                    target=self.globalTaskTarget)
        return task

    def fork(self, data=None):
        """ Forks the FSMContext.

        When an FSMContext is forked, an identical copy of the finite state machine is generated
        that will have the same event dispatched to it as the machine that called .fork(). The data
        parameter is useful for allowing each forked instance to operate on a different bit of data.

        @param data: an option mapping of data to apply to the forked FSMContext
        """
        obj = self.__obj
        if obj.get(constants.FORKED_CONTEXTS_PARAM) is None:
            obj[constants.FORKED_CONTEXTS_PARAM] = []
        forkedContexts = obj.get(constants.FORKED_CONTEXTS_PARAM)
        data = copy.copy(data) or {}
        data[constants.FORK_PARAM] = len(forkedContexts)
        forkedContexts.append(self.clone(updateData=data))

    def spawn(self, machineName, contexts, countdown=0, method='POST',
              _currentConfig=None, taskName=None):
        """ Spawns new machines.

        @param machineName the machine to spawn
        @param contexts a list of contexts (dictionaries) to spawn the new machine(s) with; multiple contexts will spawn
                        multiple machines
        @param countdown the countdown (in seconds) to wait before spawning machines
        @param method the method ('GET' or 'POST') to invoke the machine with (default: POST)
        @param _currentConfig test injection for configuration
        @param taskName used for idempotency; will become the root of the task name for the actual task queued
        """
        # using the current task name as a root to startStateMachine will make this idempotent
        taskName = taskName or self.__obj[constants.TASK_NAME_PARAM]
        startStateMachine(machineName, contexts, taskName=taskName, method=method, countdown=countdown,
                          _currentConfig=_currentConfig, headers=self.headers)

    def initialize(self):
        """ Initializes the FSMContext. Queues a Task (so that we can benefit from auto-retry) to dispatch
        an event and take the machine from 'pseudo-init' into the state machine's initial state, as
        defined in the fsm.yaml file.

        @param data: a dict of initial key, value pairs to stuff into the FSMContext
        @return: an event string to dispatch to the FSMContext to put it into the initialState
        """
        self[constants.STEPS_PARAM] = 0
        task = self.generateInitializationTask()
        self.Queue(name=self.queueName).add(task)
        key = db.Key.from_path(_FantasmInstance.kind(), self.instanceName, namespace='')
        _FantasmInstance(key=key, instanceName=self.instanceName).put()

        return FSM.PSEUDO_INIT

    def dispatch(self, event, obj):
        """ The main entry point to move the machine according to an event.

        @param event: a string event to dispatch to the FSMContext
        @param obj: an object that the FSMContext can operate on
        @return: an event string to dispatch to the FSMContext
        """

        self.__obj = self.__obj or obj # hold the obj object for use during this context

        # store the starting state and event for the handleEvent() method
        self.startingState = self.currentState
        self.startingEvent = event

        nextEvent = None
        try:
            nextEvent = self.currentState.dispatch(self, event, obj)

            if obj.get(constants.FORKED_CONTEXTS_PARAM):
                # pylint: disable-msg=W0212
                # - accessing the protected method is fine here, since it is an instance of the same class
                tasks = []
                for context in obj[constants.FORKED_CONTEXTS_PARAM]:
                    context[constants.STEPS_PARAM] = int(context.get(constants.STEPS_PARAM, '0')) + 1
                    task = context.queueDispatch(nextEvent, queue=False)
                    if task: # fan-in magic
                        if not task.was_enqueued: # fan-in always queues
                            tasks.append(task)

                try:
                    if tasks:
                        transition = self.currentState.getTransition(nextEvent)
                        _queueTasks(self.Queue, transition.queueName, tasks)

                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    self.logger.critical(
                                     'Unable to queue fork Tasks %s as it/they already exists. (Machine %s, State %s)',
                                     [task.name for task in tasks if not task.was_enqueued],
                                     self.machineName,
                                     self.currentState.name)

            if nextEvent:
                self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1

                try:
                    self.queueDispatch(nextEvent)

                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    #
                    # FIXME: when this happens, it means there was failure shortly after queuing the Task, or
                    #        possibly even with queuing the Task. when this happens there is a chance that
                    #        two states in the machine are executing simultaneously, which is may or may not
                    #        be a good thing, depending on what each state does. gracefully handling this
                    #        exception at least means that this state will terminate.
                    # NOTE:  This happens most often due to oddities in the Task Queue system. E.g., task
                    #        queue will raise an exception while doing a BulkAdd or a TransientError occurs,
                    #        however despite this error, the task was enqueued (and the task name was
                    #        registered in the tombstone system). That is, it seems that most often, the
                    #        error that leads to this message does not mean that a particular machine did
                    #        not continue; it probably always does continue. However, not that the particular
                    #        state that issues this particular warning was triggered twice, so any side-effect
                    #        (e.g., sending an email) would have occurred twice. Remember it is very important
                    #        for your machine states to be idemopotent meaning they have to protect against
                    #        this situation on their own as the task queue system itself is distributed and
                    #        definitely not perfect.
                    self.logger.warning('Unable to queue next Task as it already exists. (Machine %s, State %s)',
                                     self.machineName,
                                     self.currentState.name)

            else:
                # if we're not in a final state, emit a log message
                # FIXME - somehow we should avoid this message if we're in the "last" step of a continuation...
                if not self.currentState.isFinalState and not obj.get(constants.TERMINATED_PARAM):
                    self.logger.critical('Non-final state did not emit an event. Machine has terminated in an ' +
                                     'unknown state. (Machine %s, State %s)' %
                                     (self.machineName, self.currentState.name))
                # if it is a final state, then dispatch the pseudo-final event to finalize the state machine
                elif self.currentState.isFinalState and self.currentState.exitAction:
                    self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                    self.queueDispatch(FSM.PSEUDO_FINAL)

        except HaltMachineError, e:
            if e.level is not None and e.message:
                self.logger.log(e.level, e.message)
            return None # stop the machine
        except Exception, e:
            level = self.logger.error
            if e.__class__ in TRANSIENT_ERRORS:
                level = self.logger.warn
            level("FSMContext.dispatch is handling the following exception:", exc_info=True)
            self._handleException(event, obj)
Beispiel #5
0
 def __init__(self, context, contexts):
     """ setup a self.logger for fan-in actions """
     super(FSMContextList, self).__init__(contexts)
     self.logger = Logger(context)
     self.instanceName = context.instanceName
Beispiel #6
0
class FSMContext(dict):
    """ A finite state machine context instance. """
    def __init__(self,
                 initialState,
                 currentState=None,
                 machineName=None,
                 instanceName=None,
                 retryOptions=None,
                 url=None,
                 queueName=None,
                 data=None,
                 contextTypes=None,
                 method='GET',
                 persistentLogging=False,
                 obj=None,
                 headers=None):
        """ Constructor
        
        @param initialState: a State instance 
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm  
        @param queueName: the name of the appengine task queue 
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks 
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on  
        """
        assert queueName

        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self,
                             obj=obj,
                             persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers

        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue  # pylint: disable-msg=C0103

    def _generateUniqueInstanceName(self):
        """ Generates a unique instance name for this machine. 
        
        @return: a FSMContext instanceName that is (pretty darn likely to be) unique
        """
        utcnow = datetime.datetime.utcnow()
        dateStr = utcnow.strftime('%Y%m%d%H%M%S')
        randomStr = ''.join(random.sample(constants.CHARS_FOR_RANDOM, 6))
        return '%s-%s-%s' % (self.machineName, dateStr, randomStr)

    def putTypedValue(self, key, value):
        """ Sets a value on context[key], but casts the value according to self.contextTypes. """

        # cast the value to the appropriate type TODO: should this be in FSMContext?
        cast = self.contextTypes[key]
        kwargs = {}
        if cast is simplejson.loads:
            kwargs = {'object_hook': models.decode}
        if isinstance(value, list):
            value = [cast(v, **kwargs) for v in value]
        else:
            value = cast(value, **kwargs)

        # update the context
        self[key] = value

    def generateInitializationTask(self, countdown=0, taskName=None):
        """ Generates a task for initializing the machine. """
        assert self.currentState.name == FSM.PSEUDO_INIT

        url = self.buildUrl(self.currentState, FSM.PSEUDO_INIT)
        params = self.buildParams(self.currentState, FSM.PSEUDO_INIT)
        taskName = taskName or self.getTaskName(FSM.PSEUDO_INIT)
        transition = self.currentState.getTransition(FSM.PSEUDO_INIT)
        task = Task(name=taskName,
                    method=self.method,
                    url=url,
                    params=params,
                    countdown=countdown,
                    headers=self.headers,
                    retry_options=transition.retryOptions)
        return task

    def fork(self, data=None):
        """ Forks the FSMContext. 
        
        When an FSMContext is forked, an identical copy of the finite state machine is generated
        that will have the same event dispatched to it as the machine that called .fork(). The data
        parameter is useful for allowing each forked instance to operate on a different bit of data.
        
        @param data: an option mapping of data to apply to the forked FSMContext 
        """
        obj = self.__obj
        if obj.get(constants.FORKED_CONTEXTS_PARAM) is None:
            obj[constants.FORKED_CONTEXTS_PARAM] = []
        forkedContexts = obj.get(constants.FORKED_CONTEXTS_PARAM)
        data = copy.copy(data) or {}
        data[constants.FORK_PARAM] = len(forkedContexts)
        forkedContexts.append(self.clone(data=data))

    def spawn(self,
              machineName,
              contexts,
              countdown=0,
              method='POST',
              _currentConfig=None):
        """ Spawns new machines.
        
        @param machineName the machine to spawn
        @param contexts a list of contexts (dictionaries) to spawn the new machine(s) with; multiple contexts will spawn
                        multiple machines
        @param countdown the countdown (in seconds) to wait before spawning machines
        @param method the method ('GET' or 'POST') to invoke the machine with (default: POST)
        
        @param _currentConfig test injection for configuration
        """
        # using the current task name as a root to startStateMachine will make this idempotent
        taskName = self.__obj[constants.TASK_NAME_PARAM]
        startStateMachine(machineName,
                          contexts,
                          taskName=taskName,
                          method=method,
                          countdown=countdown,
                          _currentConfig=_currentConfig,
                          headers=self.headers)

    def initialize(self):
        """ Initializes the FSMContext. Queues a Task (so that we can benefit from auto-retry) to dispatch
        an event and take the machine from 'pseudo-init' into the state machine's initial state, as 
        defined in the fsm.yaml file.
        
        @param data: a dict of initial key, value pairs to stuff into the FSMContext
        @return: an event string to dispatch to the FSMContext to put it into the initialState 
        """
        self[constants.STEPS_PARAM] = 0
        task = self.generateInitializationTask()
        self.Queue(name=self.queueName).add(task)
        _FantasmInstance(key_name=self.instanceName,
                         instanceName=self.instanceName).put()

        return FSM.PSEUDO_INIT

    def dispatch(self, event, obj):
        """ The main entry point to move the machine according to an event. 
        
        @param event: a string event to dispatch to the FSMContext
        @param obj: an object that the FSMContext can operate on  
        @return: an event string to dispatch to the FSMContext
        """

        self.__obj = self.__obj or obj  # hold the obj object for use during this context

        # store the starting state and event for the handleEvent() method
        self.startingState = self.currentState
        self.startingEvent = event

        nextEvent = None
        try:
            nextEvent = self.currentState.dispatch(self, event, obj)

            if obj.get(constants.FORKED_CONTEXTS_PARAM):
                # pylint: disable-msg=W0212
                # - accessing the protected method is fine here, since it is an instance of the same class
                tasks = []
                for context in obj[constants.FORKED_CONTEXTS_PARAM]:
                    context[constants.STEPS_PARAM] = int(
                        context.get(constants.STEPS_PARAM, '0')) + 1
                    task = context.queueDispatch(nextEvent, queue=False)
                    if task:  # fan-in magic
                        if not task.was_enqueued:  # fan-in always queues
                            tasks.append(task)

                try:
                    if tasks:
                        transition = self.currentState.getTransition(nextEvent)
                        _queueTasks(self.Queue, transition.queueName, tasks)

                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    self.logger.critical(
                        'Unable to queue fork Tasks %s as it/they already exists. (Machine %s, State %s)',
                        [task.name for task in tasks if not task.was_enqueued],
                        self.machineName, self.currentState.name)

            if nextEvent:
                self[constants.STEPS_PARAM] = int(
                    self.get(constants.STEPS_PARAM, '0')) + 1

                try:
                    self.queueDispatch(nextEvent)

                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    #
                    # FIXME: when this happens, it means there was failure shortly after queuing the Task, or
                    #        possibly even with queuing the Task. when this happens there is a chance that
                    #        two states in the machine are executing simultaneously, which is may or may not
                    #        be a good thing, depending on what each state does. gracefully handling this
                    #        exception at least means that this state will terminate.
                    self.logger.critical(
                        'Unable to queue next Task as it already exists. (Machine %s, State %s)',
                        self.machineName, self.currentState.name)

            else:
                # if we're not in a final state, emit a log message
                # FIXME - somehow we should avoid this message if we're in the "last" step of a continuation...
                if not self.currentState.isFinalState and not obj.get(
                        constants.TERMINATED_PARAM):
                    self.logger.critical(
                        'Non-final state did not emit an event. Machine has terminated in an '
                        + 'unknown state. (Machine %s, State %s)' %
                        (self.machineName, self.currentState.name))
                # if it is a final state, then dispatch the pseudo-final event to finalize the state machine
                elif self.currentState.isFinalState and self.currentState.exitAction:
                    self[constants.STEPS_PARAM] = int(
                        self.get(constants.STEPS_PARAM, '0')) + 1
                    self.queueDispatch(FSM.PSEUDO_FINAL)

        except Exception:
            self.logger.exception(
                "FSMContext.dispatch is handling the following exception:")
            self._handleException(event, obj)

        return nextEvent

    def continuation(self, nextToken):
        """ Performs a continuation be re-queueing an FSMContext Task with a slightly modified continuation
        token. self.startingState and self.startingEvent are used in the re-queue, so this can be seen as a
        'fork' of the current context.
        
        @param nextToken: the next continuation token
        """
        assert not self.get(
            constants.INDEX_PARAM)  # fan-out after fan-in is not allowed
        step = str(self[
            constants.STEPS_PARAM])  # needs to be a str key into a json dict

        # make a copy and set the currentState to the startingState of this context
        context = self.clone()
        context.currentState = self.startingState

        # update the generation and continuation params
        gen = context.get(constants.GEN_PARAM, {})
        gen[step] = gen.get(step, 0) + 1
        context[constants.GEN_PARAM] = gen
        context[constants.CONTINUATION_PARAM] = nextToken

        try:
            # pylint: disable-msg=W0212
            # - accessing the protected method is fine here, since it is an instance of the same class
            transition = self.startingState.getTransition(self.startingEvent)
            context._queueDispatchNormal(self.startingEvent,
                                         queue=True,
                                         queueName=transition.queueName)

        except (TaskAlreadyExistsError, TombstonedTaskError):
            # this can happen when currentState.dispatch() previously succeeded in queueing the continuation
            # Task, but failed with the doAction.execute() call in a _previous_ execution of this Task.
            # NOTE: this prevent the dreaded "fork bomb"
            self.logger.info(
                'Unable to queue continuation Task as it already exists. (Machine %s, State %s)',
                self.machineName, self.currentState.name)

    def queueDispatch(self, nextEvent, queue=True):
        """ Queues a .dispatch(nextEvent) call in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None

        # self.currentState is already transitioned away from self.startingState
        transition = self.currentState.getTransition(nextEvent)
        if transition.target.isFanIn:
            task = self._queueDispatchFanIn(
                nextEvent,
                fanInPeriod=transition.target.fanInPeriod,
                retryOptions=transition.retryOptions,
                queueName=transition.queueName)
        else:
            task = self._queueDispatchNormal(
                nextEvent,
                queue=queue,
                countdown=transition.countdown,
                retryOptions=transition.retryOptions,
                queueName=transition.queueName)

        return task

    def _queueDispatchNormal(self,
                             nextEvent,
                             queue=True,
                             countdown=0,
                             retryOptions=None,
                             queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @param countdown: the number of seconds to countdown before the queued task fires
        @param retryOptions: the RetryOptions for the task
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert queueName

        url = self.buildUrl(self.currentState, nextEvent)
        params = self.buildParams(self.currentState, nextEvent)
        taskName = self.getTaskName(nextEvent)

        task = Task(name=taskName,
                    method=self.method,
                    url=url,
                    params=params,
                    countdown=countdown,
                    retry_options=retryOptions,
                    headers=self.headers)
        if queue:
            self.Queue(name=queueName).add(task)

        return task

    def _queueDispatchFanIn(self,
                            nextEvent,
                            fanInPeriod=0,
                            retryOptions=None,
                            queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(
            constants.INDEX_PARAM)  # fan-in after fan-in is not allowed
        assert queueName

        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)

        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()

        # (***)
        #
        # grab the lock - memcache.incr()
        #
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        #
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        #
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for
        #    work packages across multiple-retry attempts, so this seems like the
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        #
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)

        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))

        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join(
            [str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)

        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex,
                                                     transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
                    self.logger.info(
                        "Work index changed from '%s' to '%s' on retry.",
                        payload, workIndex)
                    workIndex = payload

        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i)
                            for i in [actualTaskName, fork] if i]) or None
        work = _FantasmFanIn(context=self,
                             workIndex=workIndex,
                             key_name=keyName)

        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex,
                                            transactional=False)

        # put the work item
        db.put(work)

        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...)

        # release the lock - memcache.decr()
        rwlock.releaseWriteLock(index)

        try:

            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            self[constants.INDEX_PARAM] = index
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            task = Task(name='%s-%d' % (taskNameBase, index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) +
                        datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers,
                        retry_options=retryOptions)
            self.Queue(name=queueName).add(task)
            return task

        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass  # Fan-in magic

    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)

        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)

        # see comment (***) in self._queueDispatchFanIn
        #
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() >
                           self.__obj[constants.RETRY_COUNT_PARAM])

        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)

        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName

        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)

        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(
                    payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info(
                    "Fan-in idempotency guard for workIndex '%s', not processing any work items.",
                    workIndex)
                return FSMContextList(self,
                                      [])  # don't operate over the data again

        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all() \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')

        # construct a list of FSMContexts
        contexts = [self.clone(data=r.context) for r in query]
        return FSMContextList(self, contexts)

    def _getTaskRetryLimit(self):
        """ Method that returns the maximum number of retries for this particular dispatch 
        
        @param obj: an object that the FSMContext can operate on  
        """
        # get task_retry_limit configuration
        try:
            transition = self.startingState.getTransition(self.startingEvent)
            taskRetryLimit = transition.retryOptions.task_retry_limit
        except UnknownEventError:
            # can't find the transition, use the machine-level default
            taskRetryLimit = self.retryOptions.task_retry_limit
        return taskRetryLimit

    def _handleException(self, event, obj):
        """ Method for child classes to override to handle exceptions. 
        
        @param event: a string event 
        @param obj: an object that the FSMContext can operate on  
        """
        retryCount = obj.get(constants.RETRY_COUNT_PARAM, 0)
        taskRetryLimit = self._getTaskRetryLimit()

        if taskRetryLimit and retryCount >= taskRetryLimit:
            # need to permanently fail
            self.logger.critical(
                'Max-requeues reached. Machine has terminated in an unknown state. '
                + '(Machine %s, State %s, Event %s)',
                self.machineName,
                self.startingState.name,
                event,
                exc_info=True)
            # re-raise, letting App Engine TaskRetryOptions kill the task
            raise
        else:
            # re-raise the exception
            self.logger.warning(
                'Exception occurred processing event. Task will be retried. ' +
                '(Machine %s, State %s)',
                self.machineName,
                self.startingState.name,
                exc_info=True)

            # this line really just allows unit tests to work - the request is really dead at this point
            self.currentState = self.startingState

            raise

    def buildUrl(self, state, event):
        """ Builds the taskqueue url. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a url that can be used to build a taskqueue.Task instance to .dispatch(event)
        """
        assert state and event
        return self.url + '%s/%s/%s/' % (
            state.name, event, state.getTransition(event).target.name)

    def buildParams(self, state, event):
        """ Builds the taskqueue params. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a dict suitable to use in constructing a url (GET) or using as params (POST)
        """
        assert state and event
        params = {
            constants.STATE_PARAM: state.name,
            constants.EVENT_PARAM: event,
            constants.INSTANCE_NAME_PARAM: self.instanceName
        }
        for key, value in self.items():
            if key not in constants.NON_CONTEXT_PARAMS:
                if self.contextTypes.get(key) is simplejson.loads:
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, datetime.datetime):
                    value = str(int(time.mktime(value.utctimetuple())))
                if isinstance(value, dict):
                    # FIXME: should we issue a warning that they should update fsm.yaml?
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, list) and len(value) == 1:
                    key = key + '[]'  # used to preserve lists of length=1 - see handler.py for inverse
                params[key] = value
        return params

    def getTaskName(self, nextEvent, instanceName=None, fanIn=False):
        """ Returns a task name that is unique for a specific dispatch 
        
        @param nextEvent: the event to dispatch
        @return: a task name that can be used to build a taskqueue.Task instance to .dispatch(nextEvent)
        """
        transition = self.currentState.getTransition(nextEvent)
        parts = []
        parts.append(instanceName or self.instanceName)

        if self.get(constants.GEN_PARAM):
            for (step, gen) in self[constants.GEN_PARAM].items():
                parts.append('continuation-%s-%s' % (step, gen))
        if self.get(constants.FORK_PARAM):
            parts.append('fork-' + str(self[constants.FORK_PARAM]))
        # post-fan-in we need to store the workIndex in the task name to avoid duplicates, since
        # we popped the generation off during fan-in
        # FIXME: maybe not pop the generation in fan-in?
        # FIXME: maybe store this in the instanceName?
        # FIXME: i wish this was easier to get right :-)
        if (not fanIn) and self.get(constants.INDEX_PARAM):
            parts.append('work-index-' + str(self[constants.INDEX_PARAM]))
        parts.append(self.currentState.name)
        parts.append(nextEvent)
        parts.append(transition.target.name)
        parts.append('step-' + str(self[constants.STEPS_PARAM]))
        return '--'.join(parts)

    def clone(self, instanceName=None, data=None):
        """ Returns a copy of the FSMContext.
        
        @param instanceName: the instance name to optionally apply to the clone
        @param data: a dict/mapping of data to optionally apply (.update()) to the clone
        @return: a new FSMContext instance
        """
        context = copy.deepcopy(self)
        if instanceName:
            context.instanceName = instanceName
        if data:
            context.update(data)
        return context
Beispiel #7
0
class FSMContext(dict):
    """ A finite state machine context instance. """
    
    def __init__(self, initialState, currentState=None, machineName=None, instanceName=None,
                 retryOptions=None, url=None, queueName=None, data=None, contextTypes=None,
                 method='GET', persistentLogging=False, obj=None, headers=None):
        """ Constructor
        
        @param initialState: a State instance 
        @param currentState: a State instance
        @param machineName: the name of the fsm
        @param instanceName: the instance name of the fsm
        @param retryOptions: the TaskRetryOptions for the machine
        @param url: the url of the fsm  
        @param queueName: the name of the appengine task queue 
        @param headers: a dict of X-Fantasm request headers to pass along in Tasks 
        @param persistentLogging: if True, use persistent _FantasmLog model
        @param obj: an object that the FSMContext can operate on  
        """
        assert queueName
        
        super(FSMContext, self).__init__(data or {})
        self.initialState = initialState
        self.currentState = currentState
        self.currentAction = None
        if currentState:
            self.currentAction = currentState.exitAction 
        self.machineName = machineName
        self.instanceName = instanceName or self._generateUniqueInstanceName()
        self.queueName = queueName
        self.retryOptions = retryOptions
        self.url = url
        self.method = method
        self.startingEvent = None
        self.startingState = None
        self.contextTypes = constants.PARAM_TYPES.copy()
        if contextTypes:
            self.contextTypes.update(contextTypes)
        self.logger = Logger(self, obj=obj, persistentLogging=persistentLogging)
        self.__obj = obj
        self.headers = headers
        
        # the following is monkey-patched from handler.py for 'immediate mode'
        from google.appengine.api.taskqueue.taskqueue import Queue
        self.Queue = Queue # pylint: disable-msg=C0103
        
    def _generateUniqueInstanceName(self):
        """ Generates a unique instance name for this machine. 
        
        @return: a FSMContext instanceName that is (pretty darn likely to be) unique
        """
        utcnow = datetime.datetime.utcnow()
        dateStr = utcnow.strftime('%Y%m%d%H%M%S')
        randomStr = ''.join(random.sample(constants.CHARS_FOR_RANDOM, 6))
        return '%s-%s-%s' % (self.machineName, dateStr, randomStr)
        
    def putTypedValue(self, key, value):
        """ Sets a value on context[key], but casts the value according to self.contextTypes. """

        # cast the value to the appropriate type TODO: should this be in FSMContext?
        cast = self.contextTypes[key]
        kwargs = {}
        if cast is simplejson.loads:
            kwargs = {'object_hook': models.decode}
        if isinstance(value, list):
            value = [cast(v, **kwargs) for v in value]
        else:
            value = cast(value, **kwargs)

        # update the context
        self[key] = value
        
    def generateInitializationTask(self, countdown=0, taskName=None):
        """ Generates a task for initializing the machine. """
        assert self.currentState.name == FSM.PSEUDO_INIT
        
        url = self.buildUrl(self.currentState, FSM.PSEUDO_INIT)
        params = self.buildParams(self.currentState, FSM.PSEUDO_INIT)
        taskName = taskName or self.getTaskName(FSM.PSEUDO_INIT)
        task = Task(name=taskName, method=self.method, url=url, params=params, countdown=countdown, 
                    headers=self.headers)
        return task
    
    def fork(self, data=None):
        """ Forks the FSMContext. 
        
        When an FSMContext is forked, an identical copy of the finite state machine is generated
        that will have the same event dispatched to it as the machine that called .fork(). The data
        parameter is useful for allowing each forked instance to operate on a different bit of data.
        
        @param data: an option mapping of data to apply to the forked FSMContext 
        """
        obj = self.__obj
        if obj.get(constants.FORKED_CONTEXTS_PARAM) is None:
            obj[constants.FORKED_CONTEXTS_PARAM] = []
        forkedContexts = obj.get(constants.FORKED_CONTEXTS_PARAM)
        data = copy.copy(data) or {}
        data[constants.FORK_PARAM] = len(forkedContexts)
        forkedContexts.append(self.clone(data=data))
    
    def spawn(self, machineName, contexts, countdown=0, method='POST', 
              _currentConfig=None):
        """ Spawns new machines.
        
        @param machineName the machine to spawn
        @param contexts a list of contexts (dictionaries) to spawn the new machine(s) with; multiple contexts will spawn
                        multiple machines
        @param countdown the countdown (in seconds) to wait before spawning machines
        @param method the method ('GET' or 'POST') to invoke the machine with (default: POST)
        
        @param _currentConfig test injection for configuration
        """
        # using the current task name as a root to startStateMachine will make this idempotent
        taskName = self.__obj[constants.TASK_NAME_PARAM]
        startStateMachine(machineName, contexts, taskName=taskName, method=method, countdown=countdown, 
                          _currentConfig=_currentConfig)
    
    def initialize(self):
        """ Initializes the FSMContext. Queues a Task (so that we can benefit from auto-retry) to dispatch
        an event and take the machine from 'pseudo-init' into the state machine's initial state, as 
        defined in the fsm.yaml file.
        
        @param data: a dict of initial key, value pairs to stuff into the FSMContext
        @return: an event string to dispatch to the FSMContext to put it into the initialState 
        """
        self[constants.STEPS_PARAM] = 0
        task = self.generateInitializationTask()
        self.Queue(name=self.queueName).add(task)
        _FantasmInstance(key_name=self.instanceName, instanceName=self.instanceName).put()
        
        return FSM.PSEUDO_INIT
        
    def dispatch(self, event, obj):
        """ The main entry point to move the machine according to an event. 
        
        @param event: a string event to dispatch to the FSMContext
        @param obj: an object that the FSMContext can operate on  
        @return: an event string to dispatch to the FSMContext
        """
        
        self.__obj = self.__obj or obj # hold the obj object for use during this context

        # store the starting state and event for the handleEvent() method
        self.startingState = self.currentState
        self.startingEvent = event

        nextEvent = None
        try:
            nextEvent = self.currentState.dispatch(self, event, obj)
            
            if obj.get(constants.FORKED_CONTEXTS_PARAM):
                # pylint: disable-msg=W0212
                # - accessing the protected method is fine here, since it is an instance of the same class
                tasks = []
                for context in obj[constants.FORKED_CONTEXTS_PARAM]:
                    context[constants.STEPS_PARAM] = int(context.get(constants.STEPS_PARAM, '0')) + 1
                    task = context.queueDispatch(nextEvent, queue=False)
                    if task: # fan-in magic
                        if not task.was_enqueued: # fan-in always queues
                            tasks.append(task)
                
                try:
                    if tasks:
                        transition = self.currentState.getTransition(nextEvent)
                        self.Queue(name=transition.queueName).add(tasks)
                
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    self.logger.critical(
                                     'Unable to queue fork Tasks %s as it/they already exists. (Machine %s, State %s)',
                                     [task.name for task in tasks if not task.was_enqueued],
                                     self.machineName, 
                                     self.currentState.name)
                
            if nextEvent:
                self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                
                try:
                    self.queueDispatch(nextEvent)
                    
                except (TaskAlreadyExistsError, TombstonedTaskError):
                    # unlike a similar block in self.continutation, this is well off the happy path
                    #
                    # FIXME: when this happens, it means there was failure shortly after queuing the Task, or
                    #        possibly even with queuing the Task. when this happens there is a chance that 
                    #        two states in the machine are executing simultaneously, which is may or may not
                    #        be a good thing, depending on what each state does. gracefully handling this 
                    #        exception at least means that this state will terminate.
                    self.logger.critical('Unable to queue next Task as it already exists. (Machine %s, State %s)',
                                     self.machineName, 
                                     self.currentState.name)
                    
            else:
                # if we're not in a final state, emit a log message
                # FIXME - somehow we should avoid this message if we're in the "last" step of a continuation...
                if not self.currentState.isFinalState and not obj.get(constants.TERMINATED_PARAM):
                    self.logger.critical('Non-final state did not emit an event. Machine has terminated in an ' +
                                     'unknown state. (Machine %s, State %s)' %
                                     (self.machineName, self.currentState.name))
                # if it is a final state, then dispatch the pseudo-final event to finalize the state machine
                elif self.currentState.isFinalState and self.currentState.exitAction:
                    self[constants.STEPS_PARAM] = int(self.get(constants.STEPS_PARAM, '0')) + 1
                    self.queueDispatch(FSM.PSEUDO_FINAL)
                    
        except Exception:
            self.logger.exception("FSMContext.dispatch is handling the following exception:")
            self._handleException(event, obj)
            
        return nextEvent
    
    def continuation(self, nextToken):
        """ Performs a continuation be re-queueing an FSMContext Task with a slightly modified continuation
        token. self.startingState and self.startingEvent are used in the re-queue, so this can be seen as a
        'fork' of the current context.
        
        @param nextToken: the next continuation token
        """
        assert not self.get(constants.INDEX_PARAM) # fan-out after fan-in is not allowed
        step = str(self[constants.STEPS_PARAM]) # needs to be a str key into a json dict
        
        # make a copy and set the currentState to the startingState of this context
        context = self.clone()
        context.currentState = self.startingState
        
        # update the generation and continuation params
        gen = context.get(constants.GEN_PARAM, {})
        gen[step] = gen.get(step, 0) + 1
        context[constants.GEN_PARAM] = gen
        context[constants.CONTINUATION_PARAM] = nextToken
        
        try:
            # pylint: disable-msg=W0212
            # - accessing the protected method is fine here, since it is an instance of the same class
            transition = self.startingState.getTransition(self.startingEvent)
            context._queueDispatchNormal(self.startingEvent, queue=True, queueName=transition.queueName)
            
        except (TaskAlreadyExistsError, TombstonedTaskError):
            # this can happen when currentState.dispatch() previously succeeded in queueing the continuation
            # Task, but failed with the doAction.execute() call in a _previous_ execution of this Task.
            # NOTE: this prevent the dreaded "fork bomb" 
            self.logger.info('Unable to queue continuation Task as it already exists. (Machine %s, State %s)',
                          self.machineName, 
                          self.currentState.name)
    
    def queueDispatch(self, nextEvent, queue=True):
        """ Queues a .dispatch(nextEvent) call in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        
        # self.currentState is already transitioned away from self.startingState
        transition = self.currentState.getTransition(nextEvent)
        if transition.target.isFanIn:
            task = self._queueDispatchFanIn(nextEvent, fanInPeriod=transition.target.fanInPeriod,
                                            queueName=transition.queueName)
        else:
            task = self._queueDispatchNormal(nextEvent, queue=queue, countdown=transition.countdown,
                                             retryOptions=transition.retryOptions,
                                             queueName=transition.queueName)
            
        return task
        
    def _queueDispatchNormal(self, nextEvent, queue=True, countdown=0, retryOptions=None, queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the appengine Task queue. 
        
        @param nextEvent: a string event 
        @param queue: a boolean indicating whether or not to queue a Task, or leave it to the caller 
        @param countdown: the number of seconds to countdown before the queued task fires
        @param retryOptions: the RetryOptions for the task
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert queueName
        
        url = self.buildUrl(self.currentState, nextEvent)
        params = self.buildParams(self.currentState, nextEvent)
        taskName = self.getTaskName(nextEvent)
        
        task = Task(name=taskName, method=self.method, url=url, params=params, countdown=countdown,
                    retry_options=retryOptions, headers=self.headers)
        if queue:
            self.Queue(name=queueName).add(task)
        
        return task
    
    def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
        assert queueName
        
        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        self.pop(constants.FORK_PARAM, None)
        
        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        index = memcache.get('index-' + taskNameBase)
        if index is None:
            # using 'random.randint' here instead of '1' helps when the index is ejected from memcache
            # instead of restarting at the same counter, we jump (likely) far way from existing task job
            # names. 
            memcache.add('index-' + taskNameBase, random.randint(1, 2**32))
            index = memcache.get('index-' + taskNameBase)
            
        # grab the lock
        lock = '%s-lock-%d' % (taskNameBase, index)
        writers = memcache.incr(lock, initial_value=2**16)
        if writers < 2**16:
            memcache.decr(lock)
            # this will escape as a 500 error and the Task will be re-tried by appengine
            raise FanInWriteLockFailureRuntimeError(nextEvent, 
                                                    self.machineName, 
                                                    self.currentState.name, 
                                                    self.instanceName)
        
        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        work = _FantasmFanIn(context=self, workIndex=workIndex)
        work.put()
        
        # insert a task to run in the future and process a bunch of work packages
        now = time.time()
        try:
            self[constants.INDEX_PARAM] = index
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            # int(now / (fanInPeriod - 1 + 30)) included because it was in [2], but is less needed now that
            # we use random.randint in seeding memcache. for long fan in periods, and the case where random.randint
            # hits the same value twice, this may cause problems for up to fanInPeriod + 30s.
            # see: http://www.mail-archive.com/[email protected]/msg30408.html
            task = Task(name='%s-%d-%d' % (taskNameBase, int(now / (fanInPeriod - 1 + 30)), index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers)
            self.Queue(name=queueName).add(task)
            return task
        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass # Fan-in magic
        finally:
            memcache.decr(lock)
            
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)
        
        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)
        
        # tell writers to use another index
        memcache.incr('index-' + taskNameBase)
        
        lock = '%s-lock-%d' % (taskNameBase, index)
        memcache.decr(lock, 2**15) # tell writers they missed the boat
        
        # 20 iterations * 0.25s = 5s total wait time
        busyWaitIters = 20
        busyWaitIterSecs = 0.250
        
        # busy wait for writers
        for i in xrange(busyWaitIters):
            counter = memcache.get(lock)
            # counter is None --> ejected from memcache
            # int(counter) <= 2**15 --> writers have all called memcache.decr
            if counter is None or int(counter) <= 2**15:
                break
            time.sleep(busyWaitIterSecs)
            self.logger.debug("Tried to acquire lock '%s' %d times...", lock, i + 1)
        
        # FIXME: is there anything else that can be done? will work packages be lost? maybe queue another task
        #        to sweep up later?
        if i >= (busyWaitIters - 1): # pylint: disable-msg=W0631
            self.logger.error("Gave up waiting for all fan-in work items.")
        
        # at this point we could have two tasks trying to process the same work packages. in the
        # happy path this will not likely happen because the tasks are sent off with different ETAs,
        # however in the unhappy path, it is possible for multiple tasks to be executing (retry on
        # 500 etc.). we solve this with a read lock using memcache.
        #
        # FIXME: would using a transaction on db.delete work if using ancestors? one task would win the
        #        race to delete the the work based on a transaction error?
        readlock = '%s-readlock-%d' % (taskNameBase, index)
        haveReadLock = False
        try:
            # put the actual name of the winning task into to lock
            actualTaskName = self.get(constants.TASK_NAME_PARAM)
            added = memcache.add(readlock, actualTaskName, time=30) # FIXME: is 30s appropriate?
            lockValue = memcache.get(readlock)
            
            # and return the FSMContexts list
            class FSMContextList(list):
                """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
                def __init__(self, context, contexts):
                    """ setup a self.logger for fan-in actions """
                    super(FSMContextList, self).__init__(contexts)
                    self.logger = Logger(context)
                    self.instanceName = context.instanceName
            
            # if the lock value is not equal to the added value, it means this task lost the race
            if not added or lockValue != actualTaskName:
                return FSMContextList(self, [])
#                raise FanInReadLockFailureRuntimeError(event, 
#                                                       self.machineName, 
#                                                       self.currentState.name, 
#                                                       self.instanceName)
            
            # flag used in finally block to decide whether or not to log an error message
            haveReadLock = True
                
            # fetch all the work packages in the current group for processing
            workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
            query = _FantasmFanIn.all() \
                                 .filter('workIndex =', workIndex) \
                                 .order('__key__')
                                 
            # iterate over the query to fetch results - this is done in 'small batches'
            fanInResults = list(query)
            
            # construct a list of FSMContexts
            contexts = [self.clone(data=r.context) for r in fanInResults]

            # hold the fanInResult around in case we need to re-put them (on an Exception)
            obj[constants.FAN_IN_RESULTS_PARAM] = fanInResults
            
            # and delete the work packages - bearing in mind appengine limits
            maxDeleteSize = 250 # appengine does not like to delete > 500 models at a time, 250 is a nice safe number
            if len(fanInResults) > maxDeleteSize:
                self.logger.warning("%d contexts in the current batch. Consider decreasing fan-in.", len(fanInResults))
            i = 0
            while fanInResults[i:i+maxDeleteSize]:
                db.delete(fanInResults[i:i+maxDeleteSize])
                i += maxDeleteSize
                
            return FSMContextList(self, contexts)
        
        finally:
            deleted = memcache.delete(readlock)
            
            # FIXME: is there anything else that can be done? 
            if haveReadLock and deleted == memcache.DELETE_NETWORK_FAILURE:
                self.logger.error("Unable to release the fan in read lock.")
                
    def _getTaskRetryLimit(self):
        """ Method that returns the maximum number of retries for this particular dispatch 
        
        @param obj: an object that the FSMContext can operate on  
        """
        # get task_retry_limit configuration
        try:
            transition = self.startingState.getTransition(self.startingEvent)
            taskRetryLimit = transition.retryOptions.task_retry_limit
        except UnknownEventError:
            # can't find the transition, use the machine-level default
            taskRetryLimit = self.retryOptions.task_retry_limit
        return taskRetryLimit
            
    def _handleException(self, event, obj):
        """ Method for child classes to override to handle exceptions. 
        
        @param event: a string event 
        @param obj: an object that the FSMContext can operate on  
        """
        retryCount = obj.get(constants.RETRY_COUNT_PARAM, 0)
        taskRetryLimit = self._getTaskRetryLimit()
        
        if taskRetryLimit and retryCount >= taskRetryLimit:
            # need to permanently fail
            self.logger.critical('Max-requeues reached. Machine has terminated in an unknown state. ' +
                             '(Machine %s, State %s, Event %s)',
                             self.machineName, self.startingState.name, event, exc_info=True)
            # re-raise, letting App Engine TaskRetryOptions kill the task
            raise
        else:
            # re-raise the exception
            self.logger.warning('Exception occurred processing event. Task will be retried. ' +
                            '(Machine %s, State %s)',
                            self.machineName, self.startingState.name, exc_info=True)
            
            # re-put fan-in work packages
            if obj.get(constants.FAN_IN_RESULTS_PARAM):
                try:
                    fanInResults = obj[constants.FAN_IN_RESULTS_PARAM]
                    maxPutSize = 250 # put in chunks, rather than the entire list which could be large
                    i = 0
                    while(fanInResults[i:i+maxPutSize]):
                        db.put(fanInResults[i:i+maxPutSize])
                        i += maxPutSize
                except Exception:
                    self.logger.critical("Unable to re.put() for workIndex = %s", self.fanInResults[0].workIndex)
                    raise
                
            # this line really just allows unit tests to work - the request is really dead at this point
            self.currentState = self.startingState
            
            raise
    
    def buildUrl(self, state, event):
        """ Builds the taskqueue url. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a url that can be used to build a taskqueue.Task instance to .dispatch(event)
        """
        assert state and event
        return self.url + '%s/%s/%s/' % (state.name, 
                                         event, 
                                         state.getTransition(event).target.name)
    
    def buildParams(self, state, event):
        """ Builds the taskqueue params. 
        
        @param state: the State to dispatch to
        @param event: the event to dispatch
        @return: a dict suitable to use in constructing a url (GET) or using as params (POST)
        """
        assert state and event
        params = {constants.STATE_PARAM: state.name, 
                  constants.EVENT_PARAM: event,
                  constants.INSTANCE_NAME_PARAM: self.instanceName}
        for key, value in self.items():
            if key not in constants.NON_CONTEXT_PARAMS:
                if self.contextTypes.get(key) is simplejson.loads:
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, datetime.datetime):
                    value = str(int(time.mktime(value.utctimetuple())))
                if isinstance(value, dict):
                    # FIXME: should we issue a warning that they should update fsm.yaml?
                    value = simplejson.dumps(value, cls=models.Encoder)
                if isinstance(value, list) and len(value) == 1:
                    key = key + '[]' # used to preserve lists of length=1 - see handler.py for inverse
                params[key] = value
        return params

    def getTaskName(self, nextEvent, instanceName=None, fanIn=False):
        """ Returns a task name that is unique for a specific dispatch 
        
        @param nextEvent: the event to dispatch
        @return: a task name that can be used to build a taskqueue.Task instance to .dispatch(nextEvent)
        """
        transition = self.currentState.getTransition(nextEvent)
        parts = []
        parts.append(instanceName or self.instanceName)
        
        if self.get(constants.GEN_PARAM):
            for (step, gen) in self[constants.GEN_PARAM].items():
                parts.append('continuation-%s-%s' % (step, gen))
        if self.get(constants.FORK_PARAM):
            parts.append('fork-' + str(self[constants.FORK_PARAM]))
        # post-fan-in we need to store the workIndex in the task name to avoid duplicates, since
        # we popped the generation off during fan-in
        # FIXME: maybe not pop the generation in fan-in?
        # FIXME: maybe store this in the instanceName?
        # FIXME: i wish this was easier to get right :-)
        if (not fanIn) and self.get(constants.INDEX_PARAM):
            parts.append('work-index-' + str(self[constants.INDEX_PARAM]))
        parts.append(self.currentState.name)
        parts.append(nextEvent)
        parts.append(transition.target.name)
        parts.append('step-' + str(self[constants.STEPS_PARAM]))
        return '--'.join(parts)
    
    def clone(self, instanceName=None, data=None):
        """ Returns a copy of the FSMContext.
        
        @param instanceName: the instance name to optionally apply to the clone
        @param data: a dict/mapping of data to optionally apply (.update()) to the clone
        @return: a new FSMContext instance
        """
        context = copy.deepcopy(self)
        if instanceName:
            context.instanceName = instanceName
        if data:
            context.update(data)
        return context