Exemple #1
0
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.
        
        @param event: an event that is being merge joined (destination state must be a fan in) 
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)

        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        taskNameBase = self.getTaskName(event, fanIn=True)

        # see comment (***) in self._queueDispatchFanIn
        #
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() >
                           self.__obj[constants.RETRY_COUNT_PARAM])

        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)

        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName

        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)

        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(
                    payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info(
                    "Fan-in idempotency guard for workIndex '%s', not processing any work items.",
                    workIndex)
                return FSMContextList(self,
                                      [])  # don't operate over the data again

        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all() \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')

        # construct a list of FSMContexts
        contexts = [self.clone(data=r.context) for r in query]
        return FSMContextList(self, contexts)
Exemple #2
0
    def mergeJoinDispatch(self, event, obj):
        """ Performs a merge join on the pending fan-in dispatches.

        @param event: an event that is being merge joined (destination state must be a fan in)
        @return: a list (possibly empty) of FSMContext instances
        """
        # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context
        assert not self.get(constants.GEN_PARAM)
        assert not self.get(constants.FORK_PARAM)

        # the work package index is stored in the url of the Task/FSMContext
        index = self.get(constants.INDEX_PARAM)
        self.logger.debug('Index: %s', index)
        taskNameBase = self.getTaskName(event, fanIn=True)

        # see comment (***) in self._queueDispatchFanIn
        #
        # in the case of failing to acquire a read lock (due to failed release of write lock)
        # we have decided to keep retrying
        raiseOnFail = False
        if self._getTaskRetryLimit() is not None:
            raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM])

        rwlock = ReadWriteLock(taskNameBase, self)
        rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail)

        # and return the FSMContexts list
        class FSMContextList(list):
            """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """
            def __init__(self, context, contexts, guarded=False):
                """ setup a self.logger for fan-in actions """
                super(FSMContextList, self).__init__(contexts)
                self.logger = Logger(context)
                self.instanceName = context.instanceName
                self.guarded = guarded

        # see comment (A) in self._queueDispatchFanIn(...)
        time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)

        # the following step ensure that fan-in only ever operates one time over a list of data
        # the entity is created in State.dispatch(...) _after_ all the actions have executed
        # successfully
        khash = knuthHash(index)
        self.logger.debug('knuthHash of index: %s', khash)
        workIndex = '%s-%d' % (taskNameBase, khash)
        if obj[constants.RETRY_COUNT_PARAM] > 0:
            semaphore = RunOnceSemaphore(workIndex, self)
            if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]):
                self.logger.info("Fan-in idempotency guard for workIndex '%s', not processing any work items.",
                                 workIndex)
                return FSMContextList(self, [], guarded=True) # don't operate over the data again

        # fetch all the work packages in the current group for processing
        query = _FantasmFanIn.all(namespace='') \
                             .filter('workIndex =', workIndex) \
                             .order('__key__')

        # construct a list of FSMContexts
        contexts = [self.clone(replaceData=r.context) for r in query]
        return FSMContextList(self, contexts)
Exemple #3
0
 def test_releaseWriteLock(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)
     self.assertEqual('65537', memcache.get(lock.lockKey(index)))
     lock.releaseWriteLock(index)
     self.assertEqual('65536', memcache.get(lock.lockKey(index)))
Exemple #4
0
 def test_acquireWriteLock_failure(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)  # need to call before acquireReadLock
     self.assertEqual('65537', memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual('32769', memcache.get(lock.lockKey(index)))
     self.assertRaises(FanInWriteLockFailureRuntimeError,
                       lock.acquireWriteLock, index)
Exemple #5
0
 def test_releaseWriteLock(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)
     self.assertEqual(65537, memcache.get(lock.lockKey(index)))
     lock.releaseWriteLock(index)
     self.assertEqual(65536, memcache.get(lock.lockKey(index)))
Exemple #6
0
 def test_acquireWriteLock_failure(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index) # need to call before acquireReadLock
     self.assertEqual(65537, memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual(32769, memcache.get(lock.lockKey(index)))
     self.assertRaises(FanInWriteLockFailureRuntimeError, lock.acquireWriteLock, index)
Exemple #7
0
 def test_acquireReadLock_gave_up(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)
     self.assertEqual('65537', memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual('32769', memcache.get(lock.lockKey(index)))
     self.assertEqual([
         "Tried to acquire read lock 'foo-lock-3626764237' 1 times...",
         "Tried to acquire read lock 'foo-lock-3626764237' 2 times..."
     ], self.loggingDouble.messages['debug'])
     self.assertEqual([
         "Gave up waiting for all fan-in work items with read lock 'foo-lock-3626764237'."
     ], self.loggingDouble.messages['critical'])
Exemple #8
0
 def test_acquireReadLock_gave_up(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)
     self.assertEqual(65537, memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual(32769, memcache.get(lock.lockKey(index)))
     self.assertEqual(["Tried to acquire read lock 'foo-lock-3626764237' 1 times...",
                       "Tried to acquire read lock 'foo-lock-3626764237' 2 times..."],
                       self.loggingDouble.messages['debug'])
     self.assertEqual(["Gave up waiting for all fan-in work items with read lock 'foo-lock-3626764237'."],
                      self.loggingDouble.messages['critical'])
Exemple #9
0
 def test_acquireReadLock_before_acquireWriteLock(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     self.assertEqual([], self.loggingDouble.messages['debug'])
     self.assertEqual([], self.loggingDouble.messages['critical'])
Exemple #10
0
 def test_acquireReadLock_one_wait_iter(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireWriteLock(index)
     self.assertEqual(65537, memcache.get(lock.lockKey(index)))
     def sleepAndRelease(seconds): # pylint: disable=W0613
         lock.releaseWriteLock(index)
     mock('time.sleep', returns_func=sleepAndRelease, tracker=None)
     lock.acquireReadLock(index)
     self.assertEqual(32768, memcache.get(lock.lockKey(index)))
     self.assertEqual(["Tried to acquire read lock 'foo-lock-3626764237' 1 times..."],
                      self.loggingDouble.messages['debug'])
     self.assertEqual(["Gave up waiting for all fan-in work items with read lock 'foo-lock-3626764237'."],
                      self.loggingDouble.messages['critical'])
Exemple #11
0
    def test_acquireReadLock_one_wait_iter(self):
        lock = ReadWriteLock('foo', self.context)
        index = lock.currentIndex()
        self.assertEqual(None, memcache.get(lock.lockKey(index)))
        lock.acquireWriteLock(index)
        self.assertEqual('65537', memcache.get(lock.lockKey(index)))

        def sleepAndRelease(seconds):  # pylint: disable-msg=W0613
            lock.releaseWriteLock(index)

        mock('time.sleep', returns_func=sleepAndRelease, tracker=None)
        lock.acquireReadLock(index)
        self.assertEqual('32768', memcache.get(lock.lockKey(index)))
        self.assertEqual(
            ["Tried to acquire read lock 'foo-lock-3626764237' 1 times..."],
            self.loggingDouble.messages['debug'])
        self.assertEqual([
            "Gave up waiting for all fan-in work items with read lock 'foo-lock-3626764237'."
        ], self.loggingDouble.messages['critical'])
Exemple #12
0
 def test_acquireReadLock_before_acquireWriteLock(self):
     lock = ReadWriteLock('foo', self.context)
     index = lock.currentIndex()
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     lock.acquireReadLock(index)
     self.assertEqual(None, memcache.get(lock.lockKey(index)))
     self.assertEqual([], self.loggingDouble.messages['debug'])
     self.assertEqual([], self.loggingDouble.messages['critical'])
Exemple #13
0
 def test_indexKey(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual('index-foo', lock.indexKey())
Exemple #14
0
 def test_lockKey(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual('foo-lock-999', lock.lockKey(999))
Exemple #15
0
 def test_currentIndex_index_expired(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     random.seed(1)
     memcache.delete(lock.indexKey())
     self.assertEqual(577090035, lock.currentIndex())
Exemple #16
0
 def test_currentIndex_index_changed(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     memcache.incr(lock.indexKey())
     self.assertEqual(3626764238, lock.currentIndex())
Exemple #17
0
 def test_currentIndex(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     self.assertEqual(3626764237, lock.currentIndex())
Exemple #18
0
 def test_lockKey(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual('foo-lock-999', lock.lockKey(999))
Exemple #19
0
 def test_indexKey(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual('index-foo', lock.indexKey())
Exemple #20
0
 def test_currentIndex(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     self.assertEqual(3626764237, lock.currentIndex())
Exemple #21
0
 def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None):
     """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
     datastore for processing by the queued .dispatch(nextEvent)
     
     @param nextEvent: a string event 
     @param fanInPeriod: the period of time between fan in Tasks 
     @param queueName: the queue name to Queue into 
     @return: a taskqueue.Task instance which may or may not have been queued already
     """
     assert nextEvent is not None
     assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
     assert queueName
     
     # we pop this off here because we do not want the fan-out/continuation param as part of the
     # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
     self.pop(constants.GEN_PARAM, None)
     fork = self.pop(constants.FORK_PARAM, None)
     
     # transfer the fan-in-group into the context (under a fixed value key) so that states beyond 
     # the fan-in get unique Task names
     # FIXME: this will likely change once we formalize what to do post fan-in
     transition = self.currentState.getTransition(nextEvent)
     if self.get(transition.target.fanInGroup) is not None:
         self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup]
     
     taskNameBase = self.getTaskName(nextEvent, fanIn=True)
     rwlock = ReadWriteLock(taskNameBase, self)
     index = rwlock.currentIndex()
         
     # (***)
     #
     # grab the lock - memcache.incr()
     # 
     # on Task retry, multiple incr() calls are possible. possible ways to handle:
     #
     # 1. release the lock in a 'finally' clause, but then risk missing a work
     #    package because acquiring the read lock will succeed even though the
     #    work package was not written yet.
     #
     # 2. allow the lock to get too high. the fan-in logic attempts to wait for 
     #    work packages across multiple-retry attempts, so this seems like the 
     #    best option. we basically trade a bit of latency in fan-in for reliability.
     #    
     rwlock.acquireWriteLock(index, nextEvent=nextEvent)
     
     # insert the work package, which is simply a serialized FSMContext
     workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
     
     # on retry, we want to ensure we get the same work index for this task
     actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
     indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     semaphore = RunOnceSemaphore(indexKeyName, self)
     
     # check if the workIndex changed during retry
     semaphoreWritten = False
     if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
         # see comment (A) in self._queueDispatchFanIn(...)
         time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
         payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False)
         if payload:
             semaphoreWritten = True
             if payload != workIndex:
                 self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex)
                 workIndex = payload
     
     # update this here so it gets written down into the work package too
     self[constants.INDEX_PARAM] = index
             
     # write down two models, one actual work package, one idempotency package
     keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName)
     
     # close enough to idempotent, but could still write only one of the entities
     # FIXME: could be made faster using a bulk put, but this interface is cleaner
     if not semaphoreWritten:
         semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False)
     
     # put the work item
     db.put(work)
     
     # (A) now the datastore is asynchronously writing the indices, so the work package may
     #     not show up in a query for a period of time. there is a corresponding time.sleep()
     #     in the fan-in of self.mergeJoinDispatch(...) 
         
     # release the lock - memcache.decr()
     rwlock.releaseWriteLock(index)
         
     try:
         
         # insert a task to run in the future and process a bunch of work packages
         now = time.time()
         url = self.buildUrl(self.currentState, nextEvent)
         params = self.buildParams(self.currentState, nextEvent)
         task = Task(name='%s-%d' % (taskNameBase, index),
                     method=self.method,
                     url=url,
                     params=params,
                     eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                     headers=self.headers,
                     retry_options=retryOptions)
         self.Queue(name=queueName).add(task)
         return task
     
     except (TaskAlreadyExistsError, TombstonedTaskError):
         pass # Fan-in magic
Exemple #22
0
 def test_currentIndex_index_changed(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     memcache.incr(lock.indexKey())
     self.assertEqual(3626764238, lock.currentIndex())
Exemple #23
0
    def _queueDispatchFanIn(self,
                            nextEvent,
                            fanInPeriod=0,
                            retryOptions=None,
                            queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(
            constants.INDEX_PARAM)  # fan-in after fan-in is not allowed
        assert queueName

        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)

        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()

        # (***)
        #
        # grab the lock - memcache.incr()
        #
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        #
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        #
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for
        #    work packages across multiple-retry attempts, so this seems like the
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        #
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)

        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))

        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join(
            [str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)

        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex,
                                                     transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
                    self.logger.info(
                        "Work index changed from '%s' to '%s' on retry.",
                        payload, workIndex)
                    workIndex = payload

        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i)
                            for i in [actualTaskName, fork] if i]) or None
        work = _FantasmFanIn(context=self,
                             workIndex=workIndex,
                             key_name=keyName)

        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex,
                                            transactional=False)

        # put the work item
        db.put(work)

        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...)

        # release the lock - memcache.decr()
        rwlock.releaseWriteLock(index)

        try:

            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            self[constants.INDEX_PARAM] = index
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            task = Task(name='%s-%d' % (taskNameBase, index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) +
                        datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers,
                        retry_options=retryOptions)
            self.Queue(name=queueName).add(task)
            return task

        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass  # Fan-in magic
Exemple #24
0
 def test_currentIndex_index_expired(self):
     lock = ReadWriteLock('foo', self.context)
     self.assertEqual(3626764237, lock.currentIndex())
     random.seed(1)
     memcache.delete(lock.indexKey())
     self.assertEqual(577090035, lock.currentIndex())