Example #1
0
 def test_mergeJoinDispatch_1234_contexts(self):
     for i in xrange(1234):
         _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put()
     self.assertEqual(1000, _FantasmFanIn.all().count()) # can't get them all with .count()
     contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0})
     self.assertEqual(1234, len(contexts))
     self.assertEqual(1000, _FantasmFanIn.all().count())
Example #2
0
 def test_mergeJoinDispatch_1_context(self):
     _FantasmFanIn(
         workIndex='instanceName--foo--event--foo2--step-0-2654435761').put(
         )
     self.assertEqual(1, _FantasmFanIn.all().count())
     contexts = self.context.mergeJoinDispatch('event',
                                               {RETRY_COUNT_PARAM: 0})
     self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts)
     self.assertEqual(1, _FantasmFanIn.all().count())
Example #3
0
 def test_mergeJoinDispatch_1234_contexts(self):
     for i in xrange(1234):
         _FantasmFanIn(
             workIndex='instanceName--foo--event--foo2--step-0-2654435761'
         ).put()
     self.assertEqual(
         1000,
         _FantasmFanIn.all().count())  # can't get them all with .count()
     contexts = self.context.mergeJoinDispatch('event',
                                               {RETRY_COUNT_PARAM: 0})
     self.assertEqual(1234, len(contexts))
     self.assertEqual(1000, _FantasmFanIn.all().count())
Example #4
0
 def test_datetime_list(self):
     model = _FantasmFanIn()
     nows = [datetime.datetime.now(), datetime.datetime.now()]
     model.context = {'a': nows}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': nows}, model.context)
Example #5
0
 def test_datetime(self):
     model = _FantasmFanIn()
     now = datetime.datetime.now()
     model.context = {'a': now}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': now}, model.context)
Example #6
0
 def test_datetime_list(self):
     model = _FantasmFanIn()
     nows = [datetime.datetime.now(), datetime.datetime.now()]
     model.context = {'a': nows}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': nows}, model.context)
Example #7
0
 def test_datetime(self):
     model = _FantasmFanIn()
     now = datetime.datetime.now()
     model.context = {'a': now}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': now}, model.context)
Example #8
0
 def test_db_Key_list(self):
     model = _FantasmFanIn()
     model.context = {'a': [self.testModel.key()]}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': [self.testModel.key()]}, model.context)
Example #9
0
File: fsm.py Project: iki/fantasm
 def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None):
     """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
     datastore for processing by the queued .dispatch(nextEvent)
     
     @param nextEvent: a string event 
     @param fanInPeriod: the period of time between fan in Tasks 
     @param queueName: the queue name to Queue into 
     @return: a taskqueue.Task instance which may or may not have been queued already
     """
     assert nextEvent is not None
     assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
     assert queueName
     
     # we pop this off here because we do not want the fan-out/continuation param as part of the
     # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
     self.pop(constants.GEN_PARAM, None)
     fork = self.pop(constants.FORK_PARAM, None)
     
     # transfer the fan-in-group into the context (under a fixed value key) so that states beyond 
     # the fan-in get unique Task names
     # FIXME: this will likely change once we formalize what to do post fan-in
     transition = self.currentState.getTransition(nextEvent)
     if self.get(transition.target.fanInGroup) is not None:
         self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup]
     
     taskNameBase = self.getTaskName(nextEvent, fanIn=True)
     rwlock = ReadWriteLock(taskNameBase, self)
     index = rwlock.currentIndex()
         
     # (***)
     #
     # grab the lock - memcache.incr()
     # 
     # on Task retry, multiple incr() calls are possible. possible ways to handle:
     #
     # 1. release the lock in a 'finally' clause, but then risk missing a work
     #    package because acquiring the read lock will succeed even though the
     #    work package was not written yet.
     #
     # 2. allow the lock to get too high. the fan-in logic attempts to wait for 
     #    work packages across multiple-retry attempts, so this seems like the 
     #    best option. we basically trade a bit of latency in fan-in for reliability.
     #    
     rwlock.acquireWriteLock(index, nextEvent=nextEvent)
     
     # insert the work package, which is simply a serialized FSMContext
     workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
     
     # on retry, we want to ensure we get the same work index for this task
     actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
     indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     semaphore = RunOnceSemaphore(indexKeyName, self)
     
     # check if the workIndex changed during retry
     semaphoreWritten = False
     if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
         # see comment (A) in self._queueDispatchFanIn(...)
         time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
         payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False)
         if payload:
             semaphoreWritten = True
             if payload != workIndex:
                 self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex)
                 workIndex = payload
     
     # update this here so it gets written down into the work package too
     self[constants.INDEX_PARAM] = index
             
     # write down two models, one actual work package, one idempotency package
     keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None
     work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName)
     
     # close enough to idempotent, but could still write only one of the entities
     # FIXME: could be made faster using a bulk put, but this interface is cleaner
     if not semaphoreWritten:
         semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False)
     
     # put the work item
     db.put(work)
     
     # (A) now the datastore is asynchronously writing the indices, so the work package may
     #     not show up in a query for a period of time. there is a corresponding time.sleep()
     #     in the fan-in of self.mergeJoinDispatch(...) 
         
     # release the lock - memcache.decr()
     rwlock.releaseWriteLock(index)
         
     try:
         
         # insert a task to run in the future and process a bunch of work packages
         now = time.time()
         url = self.buildUrl(self.currentState, nextEvent)
         params = self.buildParams(self.currentState, nextEvent)
         task = Task(name='%s-%d' % (taskNameBase, index),
                     method=self.method,
                     url=url,
                     params=params,
                     eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                     headers=self.headers,
                     retry_options=retryOptions)
         self.Queue(name=queueName).add(task)
         return task
     
     except (TaskAlreadyExistsError, TombstonedTaskError):
         pass # Fan-in magic
Example #10
0
    def _queueDispatchFanIn(self,
                            nextEvent,
                            fanInPeriod=0,
                            retryOptions=None,
                            queueName=None):
        """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
        datastore for processing by the queued .dispatch(nextEvent)
        
        @param nextEvent: a string event 
        @param fanInPeriod: the period of time between fan in Tasks 
        @param queueName: the queue name to Queue into 
        @return: a taskqueue.Task instance which may or may not have been queued already
        """
        assert nextEvent is not None
        assert not self.get(
            constants.INDEX_PARAM)  # fan-in after fan-in is not allowed
        assert queueName

        # we pop this off here because we do not want the fan-out/continuation param as part of the
        # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
        self.pop(constants.GEN_PARAM, None)
        fork = self.pop(constants.FORK_PARAM, None)

        taskNameBase = self.getTaskName(nextEvent, fanIn=True)
        rwlock = ReadWriteLock(taskNameBase, self)
        index = rwlock.currentIndex()

        # (***)
        #
        # grab the lock - memcache.incr()
        #
        # on Task retry, multiple incr() calls are possible. possible ways to handle:
        #
        # 1. release the lock in a 'finally' clause, but then risk missing a work
        #    package because acquiring the read lock will succeed even though the
        #    work package was not written yet.
        #
        # 2. allow the lock to get too high. the fan-in logic attempts to wait for
        #    work packages across multiple-retry attempts, so this seems like the
        #    best option. we basically trade a bit of latency in fan-in for reliability.
        #
        rwlock.acquireWriteLock(index, nextEvent=nextEvent)

        # insert the work package, which is simply a serialized FSMContext
        workIndex = '%s-%d' % (taskNameBase, knuthHash(index))

        # on retry, we want to ensure we get the same work index for this task
        actualTaskName = self.__obj[constants.TASK_NAME_PARAM]
        indexKeyName = 'workIndex-' + '-'.join(
            [str(i) for i in [actualTaskName, fork] if i]) or None
        semaphore = RunOnceSemaphore(indexKeyName, self)

        # check if the workIndex changed during retry
        semaphoreWritten = False
        if self.__obj[constants.RETRY_COUNT_PARAM] > 0:
            # see comment (A) in self._queueDispatchFanIn(...)
            time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME)
            payload = semaphore.readRunOnceSemaphore(payload=workIndex,
                                                     transactional=False)
            if payload:
                semaphoreWritten = True
                if payload != workIndex:
                    self.logger.info(
                        "Work index changed from '%s' to '%s' on retry.",
                        payload, workIndex)
                    workIndex = payload

        # write down two models, one actual work package, one idempotency package
        keyName = '-'.join([str(i)
                            for i in [actualTaskName, fork] if i]) or None
        work = _FantasmFanIn(context=self,
                             workIndex=workIndex,
                             key_name=keyName)

        # close enough to idempotent, but could still write only one of the entities
        # FIXME: could be made faster using a bulk put, but this interface is cleaner
        if not semaphoreWritten:
            semaphore.writeRunOnceSemaphore(payload=workIndex,
                                            transactional=False)

        # put the work item
        db.put(work)

        # (A) now the datastore is asynchronously writing the indices, so the work package may
        #     not show up in a query for a period of time. there is a corresponding time.sleep()
        #     in the fan-in of self.mergeJoinDispatch(...)

        # release the lock - memcache.decr()
        rwlock.releaseWriteLock(index)

        try:

            # insert a task to run in the future and process a bunch of work packages
            now = time.time()
            self[constants.INDEX_PARAM] = index
            url = self.buildUrl(self.currentState, nextEvent)
            params = self.buildParams(self.currentState, nextEvent)
            task = Task(name='%s-%d' % (taskNameBase, index),
                        method=self.method,
                        url=url,
                        params=params,
                        eta=datetime.datetime.utcfromtimestamp(now) +
                        datetime.timedelta(seconds=fanInPeriod),
                        headers=self.headers,
                        retry_options=retryOptions)
            self.Queue(name=queueName).add(task)
            return task

        except (TaskAlreadyExistsError, TombstonedTaskError):
            pass  # Fan-in magic
Example #11
0
 def test_mergeJoinDispatch_1_context(self):
     _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put()
     self.assertEqual(1, _FantasmFanIn.all().count())
     contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0})
     self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts)
     self.assertEqual(1, _FantasmFanIn.all().count())
Example #12
0
 def test_db_Key_list(self):
     model = _FantasmFanIn()
     model.context = {'a': [self.testModel.key()]}
     model.put()
     model = db.get(model.key())
     self.assertEqual({'a': [self.testModel.key()]}, model.context)
Example #13
0
 def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, queueName=None):
     """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the 
     datastore for processing by the queued .dispatch(nextEvent)
     
     @param nextEvent: a string event 
     @param fanInPeriod: the period of time between fan in Tasks 
     @param queueName: the queue name to Queue into 
     @return: a taskqueue.Task instance which may or may not have been queued already
     """
     assert nextEvent is not None
     assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed
     assert queueName
     
     # we pop this off here because we do not want the fan-out/continuation param as part of the
     # task name, otherwise we loose the fan-in - each fan-in gets one work unit.
     self.pop(constants.GEN_PARAM, None)
     self.pop(constants.FORK_PARAM, None)
     
     taskNameBase = self.getTaskName(nextEvent, fanIn=True)
     index = memcache.get('index-' + taskNameBase)
     if index is None:
         # using 'random.randint' here instead of '1' helps when the index is ejected from memcache
         # instead of restarting at the same counter, we jump (likely) far way from existing task job
         # names. 
         memcache.add('index-' + taskNameBase, random.randint(1, 2**32))
         index = memcache.get('index-' + taskNameBase)
         
     # grab the lock
     lock = '%s-lock-%d' % (taskNameBase, index)
     writers = memcache.incr(lock, initial_value=2**16)
     if writers < 2**16:
         memcache.decr(lock)
         # this will escape as a 500 error and the Task will be re-tried by appengine
         raise FanInWriteLockFailureRuntimeError(nextEvent, 
                                                 self.machineName, 
                                                 self.currentState.name, 
                                                 self.instanceName)
     
     # insert the work package, which is simply a serialized FSMContext
     workIndex = '%s-%d' % (taskNameBase, knuthHash(index))
     work = _FantasmFanIn(context=self, workIndex=workIndex)
     work.put()
     
     # insert a task to run in the future and process a bunch of work packages
     now = time.time()
     try:
         self[constants.INDEX_PARAM] = index
         url = self.buildUrl(self.currentState, nextEvent)
         params = self.buildParams(self.currentState, nextEvent)
         # int(now / (fanInPeriod - 1 + 30)) included because it was in [2], but is less needed now that
         # we use random.randint in seeding memcache. for long fan in periods, and the case where random.randint
         # hits the same value twice, this may cause problems for up to fanInPeriod + 30s.
         # see: http://www.mail-archive.com/[email protected]/msg30408.html
         task = Task(name='%s-%d-%d' % (taskNameBase, int(now / (fanInPeriod - 1 + 30)), index),
                     method=self.method,
                     url=url,
                     params=params,
                     eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod),
                     headers=self.headers)
         self.Queue(name=queueName).add(task)
         return task
     except (TaskAlreadyExistsError, TombstonedTaskError):
         pass # Fan-in magic
     finally:
         memcache.decr(lock)