Beispiel #1
0
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)

        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)

        # see comment (***) in self._queueDispatchFanIn
        #
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() >
                           self.__obj[constants.RETRY_COUNT_PARAM])

        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)

        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName

        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)

        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(
                    payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info(
                    "Fan-in idempotency guard for workIndex '%s', not processing any work items.",
                    workIndex)
                return FSMContextList(self,
                                      [])  # don't operate over the data again

        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all() \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')

        # construct a list of FSMContexts
        contexts = [self.clone(data=r.context) for r in query]
        return FSMContextList(self, contexts)
Beispiel #2
0
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.

        @param event: an event that is being merge joined (destination state must be a fan in)
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)

        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        self.logger.debug('Index: %s', index)
        taskNameBase = self.getTaskName(event, fanIn=True)

        # see comment (***) in self._queueDispatchFanIn
        #
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM])

        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)

        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts, guarded=False):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName
                self.guarded = guarded

        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)

        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        khash = knuthHash(index)
        self.logger.debug('knuthHash of index: %s', khash)
        workIndex = '%s-%d' % (taskNameBase, khash)
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info("Fan-in idempotency guard for workIndex '%s', not processing any work items.",
                                 workIndex)
                return FSMContextList(self, [], guarded=True) # don't operate over the data again

        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all(namespace='') \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')

        # construct a list of FSMContexts
        contexts = [self.clone(replaceData=r.context) for r in query]
        return FSMContextList(self, contexts)
Beispiel #3
0
 def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None):
     """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
     datastore for processing by the queued .dispatch(nextEvent)
     
     @param nextEvent: a string event 
     @param fanInPeriod: the period of time between fan in Tasks 
     @param queueName: the queue name to Queue into 
     @return: a taskqueue.Task instance which may or may not have been queued already
     """
     assert nextEvent is not None
     assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
     assert queueName
     
     # we pop this off here because we do not want the fan-out/continuation param as part of the
     # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
     self.pop(constants.GEN_PARAM, None)
     fork = self.pop(constants.FORK_PARAM, None)
     
     # transfer the fan-in-group into the context (under a fixed value key) so that states beyond 
     # the fan-in get unique Task names
     # FIXME: this will likely change once we formalize what to do post fan-in
     transition = self.currentState.getTransition(nextEvent)
     if self.get(transition.target.fanInGroup) is not None:
         self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup]
     
     taskNameBase = self.getTaskName(nextEvent, fanIn=True)
     rwlock = ReadWriteLock(taskNameBase, self)
     index = rwlock.currentIndex()
         
     # (***)
     #
     # grab the lock - memcache.incr()
     # 
     # on Task retry, multiple incr() calls are possible. possible ways to handle:
     #
     # 1. release the lock in a 'finally' clause, but then risk missing a work
     #    package because acquiring the read lock will succeed even though the
     #    work package was not written yet.
     #
     # 2. allow the lock to get too high. the fan-in logic attempts to wait for 
     #    work packages across multiple-retry attempts, so this seems like the 
     #    best option. we basically trade a bit of latency in fan-in for reliability.
     #    
     rwlock.acquireWriteLock(index, nextEvent=nextEvent)
     
     # insert the work package, which is simply a serialized FSMContext
     workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
     
     # on retry, we want to ensure we get the same work index for this task
     actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
     indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     semaphore = RunOnceSemaphore(indexKeyName, self)
     
     # check if the workIndex changed during retry
     semaphoreWritten = False
     if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
         # see comment (A) in self._queueDispatchFanIn(...)
         time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
         payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False)
         if payload:
             semaphoreWritten = True
             if payload != workIndex:
                 self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex)
                 workIndex = payload
     
     # update this here so it gets written down into the work package too
     self[constants.INDEX_PARAM] = index
             
     # write down two models, one actual work package, one idempotency package
     keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName)
     
     # close enough to idempotent, but could still write only one of the entities
     # FIXME: could be made faster using a bulk put, but this interface is cleaner
     if not semaphoreWritten:
         semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False)
     
     # put the work item
     db.put(work)
     
     # (A) now the datastore is asynchronously writing the indices, so the work package may
     #     not show up in a query for a period of time. there is a corresponding time.sleep()
     #     in the fan-in of self.mergeJoinDispatch(...) 
         
     # release the lock - memcache.decr()
     rwlock.releaseWriteLock(index)
         
     try:
         
         # insert a task to run in the future and process a bunch of work packages
         now = time.time()
         url = self.buildUrl(self.currentState, nextEvent)
         params = self.buildParams(self.currentState, nextEvent)
         task = Task(name='%s-%d' % (taskNameBase, index),
                     method=self.method,
                     url=url,
                     params=params,
                     eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                     headers=self.headers,
                     retry_options=retryOptions)
         self.Queue(name=queueName).add(task)
         return task
     
     except (TaskAlreadyExistsError, TombstonedTaskError):
         pass # Fan-in magic
Beispiel #4
0
            except HaltMachineError:
                raise # let it bubble up quietly
            except Exception, e:
                level = context.logger.error
                if e.__class__ in TRANSIENT_ERRORS:
                    level = context.logger.warn
                level('Error processing action for state. (Machine %s, State %s, Action %s)',
                      context.machineName, context.currentState.name, context.currentState.doAction.__class__)
                raise

        if transition.target.isFanIn:

            # this prevents fan-in from re-counting the data if there is an Exception
            # or DeadlineExceeded _after_ doAction.execute(...) succeeds
            index = context.get(constants.INDEX_PARAM) or contextOrContexts[0].get(constants.INDEX_PARAM)
            workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
            semaphore = RunOnceSemaphore(workIndex, context)
            semaphore.writeRunOnceSemaphore(payload=obj[constants.TASK_NAME_PARAM])

            try:
                # at this point we have processed the work items, delete them
                task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup',
                            url=constants.DEFAULT_CLEANUP_URL,
                            params={constants.WORK_INDEX_PARAM: workIndex})
                context.Queue(name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task)

            except (TaskAlreadyExistsError, TombstonedTaskError):
                context.logger.info("Fan-in cleanup Task already exists.")

            if context.get('UNITTEST_RAISE_AFTER_FAN_IN'): # only way to generate this failure
                if not contextOrContexts.guarded:
Beispiel #5
0
    def _queueDispatchFanIn(self,
                            nextEvent,
                            fanInPeriod=0,
                            retryOptions=None,
                            queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(
            constants.INDEX_PARAM)  # fan-in after fan-in is not allowed
        assert queueName

        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)

        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()

        # (***)
        #
        # grab the lock - memcache.incr()
        #
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        #
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        #
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for
        #    work packages across multiple-retry attempts, so this seems like the
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        #
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)

        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))

        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join(
            [str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)

        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex,
                                                     transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
                    self.logger.info(
                        "Work index changed from '%s' to '%s' on retry.",
                        payload, workIndex)
                    workIndex = payload

        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i)
                            for i in [actualTaskName, fork] if i]) or None
        work = _FantasmFanIn(context=self,
                             workIndex=workIndex,
                             key_name=keyName)

        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex,
                                            transactional=False)

        # put the work item
        db.put(work)

        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...)

        # release the lock - memcache.decr()
        rwlock.releaseWriteLock(index)

        try:

            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            self[constants.INDEX_PARAM] = index
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            task = Task(name='%s-%d' % (taskNameBase, index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) +
                        datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers,
                        retry_options=retryOptions)
            self.Queue(name=queueName).add(task)
            return task

        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass  # Fan-in magic
Beispiel #6
0
    def dispatch(self, context, event, obj):
        """ Fires the transition and executes the next States's entry, do and exit actions.
            
        @param context: an FSMContext instance
        @param event: a string event to dispatch to the State
        @param obj: an object that the Transition can operate on  
        @return: the event returned from the next state's main action.
        """
        transition = self.getTransition(event)

        if context.currentState.exitAction:
            try:
                context.currentAction = context.currentState.exitAction
                context.currentState.exitAction.execute(context, obj)
            except Exception:
                context.logger.error(
                    'Error processing entry action for state. (Machine %s, State %s, exitAction %s)',
                    context.machineName, context.currentState.name,
                    context.currentState.exitAction.__class__)
                raise

        # join the contexts of a fan-in
        contextOrContexts = context
        if transition.target.isFanIn:
            taskNameBase = context.getTaskName(event, fanIn=True)
            contextOrContexts = context.mergeJoinDispatch(event, obj)
            if not contextOrContexts:
                context.logger.info(
                    'Fan-in resulted in 0 contexts. Terminating machine. (Machine %s, State %s)',
                    context.machineName, context.currentState.name)
                obj[constants.TERMINATED_PARAM] = True

        transition.execute(context, obj)

        if context.currentState.entryAction:
            try:
                context.currentAction = context.currentState.entryAction
                context.currentState.entryAction.execute(
                    contextOrContexts, obj)
            except Exception:
                context.logger.error(
                    'Error processing entry action for state. (Machine %s, State %s, entryAction %s)',
                    context.machineName, context.currentState.name,
                    context.currentState.entryAction.__class__)
                raise

        if context.currentState.isContinuation:
            try:
                token = context.get(constants.CONTINUATION_PARAM, None)
                nextToken = context.currentState.doAction.continuation(
                    contextOrContexts, obj, token=token)
                if nextToken:
                    context.continuation(nextToken)
                context.pop(constants.CONTINUATION_PARAM,
                            None)  # pop this off because it is really long

            except Exception:
                context.logger.error(
                    'Error processing continuation for state. (Machine %s, State %s, continuation %s)',
                    context.machineName, context.currentState.name,
                    context.currentState.doAction.__class__)
                raise

        # either a fan-in resulted in no contexts, or a continuation was completed
        if obj.get(constants.TERMINATED_PARAM):
            return None

        nextEvent = None
        if context.currentState.doAction:
            try:
                context.currentAction = context.currentState.doAction
                nextEvent = context.currentState.doAction.execute(
                    contextOrContexts, obj)
            except Exception:
                context.logger.error(
                    'Error processing action for state. (Machine %s, State %s, Action %s)',
                    context.machineName, context.currentState.name,
                    context.currentState.doAction.__class__)
                raise

        if transition.target.isFanIn:

            # this prevents fan-in from re-counting the data if there is an Exception
            # or DeadlineExceeded _after_ doAction.execute(...) succeeds
            index = context.get(constants.INDEX_PARAM)
            workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
            semaphore = RunOnceSemaphore(workIndex, context)
            semaphore.writeRunOnceSemaphore(
                payload=obj[constants.TASK_NAME_PARAM])

            try:
                # at this point we have processed the work items, delete them
                task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup',
                            url=constants.DEFAULT_CLEANUP_URL,
                            params={constants.WORK_INDEX_PARAM: workIndex})
                context.Queue(
                    name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task)

            except (TaskAlreadyExistsError, TombstonedTaskError):
                context.logger.info("Fan-in cleanup Task already exists.")

            if context.get('UNITTEST_RAISE_AFTER_FAN_IN'
                           ):  # only way to generate this failure
                raise Exception()

        if nextEvent:
            if not isinstance(nextEvent,
                              str) or not constants.NAME_RE.match(nextEvent):
                raise InvalidEventNameRuntimeError(nextEvent,
                                                   context.machineName,
                                                   context.currentState.name,
                                                   context.instanceName)

        return nextEvent
Beispiel #7
0
 def dispatch(self, context, event, obj):
     """ Fires the transition and executes the next States's entry, do and exit actions.
         
     @param context: an FSMContext instance
     @param event: a string event to dispatch to the State
     @param obj: an object that the Transition can operate on  
     @return: the event returned from the next state's main action.
     """
     transition = self.getTransition(event)
     
     if context.currentState.exitAction:
         try:
             context.currentAction = context.currentState.exitAction
             context.currentState.exitAction.execute(context, obj)
         except Exception:
             context.logger.error('Error processing entry action for state. (Machine %s, State %s, exitAction %s)',
                           context.machineName, 
                           context.currentState.name, 
                           context.currentState.exitAction.__class__)
             raise
     
     # join the contexts of a fan-in
     contextOrContexts = context
     if transition.target.isFanIn:
         taskNameBase = context.getTaskName(event, fanIn=True)
         contextOrContexts = context.mergeJoinDispatch(event, obj)
         if not contextOrContexts and not contextOrContexts.guarded:
             # by implementation, EVERY fan-in should have at least one work package available to it, this
             # is likely caused by an index writing delay, and it is suitable to simply retry this task
             raise FanInNoContextsAvailableRuntimeError(event, 
                                                        context.machineName,
                                                        context.currentState.name,
                                                        context.instanceName)
             
     transition.execute(context, obj)
     
     if context.currentState.entryAction:
         try:
             context.currentAction = context.currentState.entryAction
             context.currentState.entryAction.execute(contextOrContexts, obj)
         except Exception:
             context.logger.error('Error processing entry action for state. (Machine %s, State %s, entryAction %s)',
                           context.machineName, 
                           context.currentState.name, 
                           context.currentState.entryAction.__class__)
             raise
         
     if context.currentState.isContinuation:
         try:
             token = context.get(constants.CONTINUATION_PARAM, None)
             nextToken = context.currentState.doAction.continuation(contextOrContexts, obj, token=token)
             if nextToken:
                 context.continuation(nextToken)
             context.pop(constants.CONTINUATION_PARAM, None) # pop this off because it is really long
             
         except Exception:
             context.logger.error('Error processing continuation for state. (Machine %s, State %s, continuation %s)',
                           context.machineName, 
                           context.currentState.name, 
                           context.currentState.doAction.__class__)
             raise
         
     # either a fan-in resulted in no contexts, or a continuation was completed
     if obj.get(constants.TERMINATED_PARAM):
         return None
         
     nextEvent = None
     if context.currentState.doAction:
         try:
             context.currentAction = context.currentState.doAction
             nextEvent = context.currentState.doAction.execute(contextOrContexts, obj)
         except Exception:
             context.logger.error('Error processing action for state. (Machine %s, State %s, Action %s)',
                           context.machineName, 
                           context.currentState.name, 
                           context.currentState.doAction.__class__)
             raise
         
     if transition.target.isFanIn:
         
         # this prevents fan-in from re-counting the data if there is an Exception
         # or DeadlineExceeded _after_ doAction.execute(...) succeeds
         index = context.get(constants.INDEX_PARAM)
         workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
         semaphore = RunOnceSemaphore(workIndex, context)
         semaphore.writeRunOnceSemaphore(payload=obj[constants.TASK_NAME_PARAM])
         
         try:
             # at this point we have processed the work items, delete them
             task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup', 
                         url=constants.DEFAULT_CLEANUP_URL, 
                         params={constants.WORK_INDEX_PARAM: workIndex})
             context.Queue(name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task)
             
         except (TaskAlreadyExistsError, TombstonedTaskError):
             context.logger.info("Fan-in cleanup Task already exists.")
             
         if context.get('UNITTEST_RAISE_AFTER_FAN_IN'): # only way to generate this failure
             if not contextOrContexts.guarded:
                 raise Exception()
             
     if nextEvent:
         if not isinstance(nextEvent, str) or not constants.NAME_RE.match(nextEvent):
             raise InvalidEventNameRuntimeError(nextEvent, context.machineName, context.currentState.name,
                                                context.instanceName)
         
     return nextEvent
Beispiel #8
0
                if e.__class__ in TRANSIENT_ERRORS:
                    level = context.logger.warn
                level(
                    'Error processing action for state. (Machine %s, State %s, Action %s)',
                    context.machineName, context.currentState.name,
                    context.currentState.doAction.__class__)
                raise

        if transition.target.isFanIn:

            # this prevents fan-in from re-counting the data if there is an Exception
            # or DeadlineExceeded _after_ doAction.execute(...) succeeds
            index = context.get(
                constants.INDEX_PARAM) or contextOrContexts[0].get(
                    constants.INDEX_PARAM)
            workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
            semaphore = RunOnceSemaphore(workIndex, context)
            semaphore.writeRunOnceSemaphore(
                payload=obj[constants.TASK_NAME_PARAM])

            try:
                # at this point we have processed the work items, delete them
                task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup',
                            url=constants.DEFAULT_CLEANUP_URL,
                            params={constants.WORK_INDEX_PARAM: workIndex})
                context.Queue(
                    name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task)

            except (TaskAlreadyExistsError, TombstonedTaskError):
                context.logger.info("Fan-in cleanup Task already exists.")
Beispiel #9
0
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)
        
        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)
        
        # tell writers to use another index
        memcache.incr('index-' + taskNameBase)
        
        lock = '%s-lock-%d' % (taskNameBase, index)
        memcache.decr(lock, 2**15) # tell writers they missed the boat
        
        # 20 iterations * 0.25s = 5s total wait time
        busyWaitIters = 20
        busyWaitIterSecs = 0.250
        
        # busy wait for writers
        for i in xrange(busyWaitIters):
            counter = memcache.get(lock)
            # counter is None --> ejected from memcache
            # int(counter) <= 2**15 --> writers have all called memcache.decr
            if counter is None or int(counter) <= 2**15:
                break
            time.sleep(busyWaitIterSecs)
            self.logger.debug("Tried to acquire lock '%s' %d times...", lock, i + 1)
        
        # FIXME: is there anything else that can be done? will work packages be lost? maybe queue another task
        #        to sweep up later?
        if i >= (busyWaitIters - 1): # pylint: disable-msg=W0631
            self.logger.error("Gave up waiting for all fan-in work items.")
        
        # at this point we could have two tasks trying to process the same work packages. in the
        # happy path this will not likely happen because the tasks are sent off with different ETAs,
        # however in the unhappy path, it is possible for multiple tasks to be executing (retry on
        # 500 etc.). we solve this with a read lock using memcache.
        #
        # FIXME: would using a transaction on db.delete work if using ancestors? one task would win the
        #        race to delete the the work based on a transaction error?
        readlock = '%s-readlock-%d' % (taskNameBase, index)
        haveReadLock = False
        try:
            # put the actual name of the winning task into to lock
            actualTaskName = self.get(constants.TASK_NAME_PARAM)
            added = memcache.add(readlock, actualTaskName, time=30) # FIXME: is 30s appropriate?
            lockValue = memcache.get(readlock)
            
            # and return the FSMContexts list
            class FSMContextList(list):
                """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
                def __init__(self, context, contexts):
                    """ setup a self.logger for fan-in actions """
                    super(FSMContextList, self).__init__(contexts)
                    self.logger = Logger(context)
                    self.instanceName = context.instanceName
            
            # if the lock value is not equal to the added value, it means this task lost the race
            if not added or lockValue != actualTaskName:
                return FSMContextList(self, [])
#                raise FanInReadLockFailureRuntimeError(event, 
#                                                       self.machineName, 
#                                                       self.currentState.name, 
#                                                       self.instanceName)
            
            # flag used in finally block to decide whether or not to log an error message
            haveReadLock = True
                
            # fetch all the work packages in the current group for processing
            workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
            query = _FantasmFanIn.all() \
                                 .filter('workIndex =', workIndex) \
                                 .order('__key__')
                                 
            # iterate over the query to fetch results - this is done in 'small batches'
            fanInResults = list(query)
            
            # construct a list of FSMContexts
            contexts = [self.clone(data=r.context) for r in fanInResults]

            # hold the fanInResult around in case we need to re-put them (on an Exception)
            obj[constants.FAN_IN_RESULTS_PARAM] = fanInResults
            
            # and delete the work packages - bearing in mind appengine limits
            maxDeleteSize = 250 # appengine does not like to delete > 500 models at a time, 250 is a nice safe number
            if len(fanInResults) > maxDeleteSize:
                self.logger.warning("%d contexts in the current batch. Consider decreasing fan-in.", len(fanInResults))
            i = 0
            while fanInResults[i:i+maxDeleteSize]:
                db.delete(fanInResults[i:i+maxDeleteSize])
                i += maxDeleteSize
                
            return FSMContextList(self, contexts)
        
        finally:
            deleted = memcache.delete(readlock)
            
            # FIXME: is there anything else that can be done? 
            if haveReadLock and deleted == memcache.DELETE_NETWORK_FAILURE:
                self.logger.error("Unable to release the fan in read lock.")
Beispiel #10
0
 def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, queueName=None):
     """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
     datastore for processing by the queued .dispatch(nextEvent)
     
     @param nextEvent: a string event 
     @param fanInPeriod: the period of time between fan in Tasks 
     @param queueName: the queue name to Queue into 
     @return: a taskqueue.Task instance which may or may not have been queued already
     """
     assert nextEvent is not None
     assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
     assert queueName
     
     # we pop this off here because we do not want the fan-out/continuation param as part of the
     # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
     self.pop(constants.GEN_PARAM, None)
     self.pop(constants.FORK_PARAM, None)
     
     taskNameBase = self.getTaskName(nextEvent, fanIn=True)
     index = memcache.get('index-' + taskNameBase)
     if index is None:
         # using 'random.randint' here instead of '1' helps when the index is ejected from memcache
         # instead of restarting at the same counter, we jump (likely) far way from existing task job
         # names. 
         memcache.add('index-' + taskNameBase, random.randint(1, 2**32))
         index = memcache.get('index-' + taskNameBase)
         
     # grab the lock
     lock = '%s-lock-%d' % (taskNameBase, index)
     writers = memcache.incr(lock, initial_value=2**16)
     if writers < 2**16:
         memcache.decr(lock)
         # this will escape as a 500 error and the Task will be re-tried by appengine
         raise FanInWriteLockFailureRuntimeError(nextEvent, 
                                                 self.machineName, 
                                                 self.currentState.name, 
                                                 self.instanceName)
     
     # insert the work package, which is simply a serialized FSMContext
     workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
     work = _FantasmFanIn(context=self, workIndex=workIndex)
     work.put()
     
     # insert a task to run in the future and process a bunch of work packages
     now = time.time()
     try:
         self[constants.INDEX_PARAM] = index
         url = self.buildUrl(self.currentState, nextEvent)
         params = self.buildParams(self.currentState, nextEvent)
         # int(now / (fanInPeriod - 1 + 30)) included because it was in [2], but is less needed now that
         # we use random.randint in seeding memcache. for long fan in periods, and the case where random.randint
         # hits the same value twice, this may cause problems for up to fanInPeriod + 30s.
         # see: http://www.mail-archive.com/[email protected]/msg30408.html
         task = Task(name='%s-%d-%d' % (taskNameBase, int(now / (fanInPeriod - 1 + 30)), index),
                     method=self.method,
                     url=url,
                     params=params,
                     eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                     headers=self.headers)
         self.Queue(name=queueName).add(task)
         return task
     except (TaskAlreadyExistsError, TombstonedTaskError):
         pass # Fan-in magic
     finally:
         memcache.decr(lock)