def test_mergeJoinDispatch_1234_contexts(self): for i in xrange(1234): _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put() self.assertEqual(1000, _FantasmFanIn.all().count()) # can't get them all with .count() contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual(1234, len(contexts)) self.assertEqual(1000, _FantasmFanIn.all().count())
def test_mergeJoinDispatch_1_context(self): _FantasmFanIn( workIndex='instanceName--foo--event--foo2--step-0-2654435761').put( ) self.assertEqual(1, _FantasmFanIn.all().count()) contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts) self.assertEqual(1, _FantasmFanIn.all().count())
def test_mergeJoinDispatch_1234_contexts(self): for i in xrange(1234): _FantasmFanIn( workIndex='instanceName--foo--event--foo2--step-0-2654435761' ).put() self.assertEqual( 1000, _FantasmFanIn.all().count()) # can't get them all with .count() contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual(1234, len(contexts)) self.assertEqual(1000, _FantasmFanIn.all().count())
def test_datetime_list(self): model = _FantasmFanIn() nows = [datetime.datetime.now(), datetime.datetime.now()] model.context = {'a': nows} model.put() model = db.get(model.key()) self.assertEqual({'a': nows}, model.context)
def test_datetime(self): model = _FantasmFanIn() now = datetime.datetime.now() model.context = {'a': now} model.put() model = db.get(model.key()) self.assertEqual({'a': now}, model.context)
def test_db_Key_list(self): model = _FantasmFanIn() model.context = {'a': [self.testModel.key()]} model.put() model = db.get(model.key()) self.assertEqual({'a': [self.testModel.key()]}, model.context)
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) fork = self.pop(constants.FORK_PARAM, None) # transfer the fan-in-group into the context (under a fixed value key) so that states beyond # the fan-in get unique Task names # FIXME: this will likely change once we formalize what to do post fan-in transition = self.currentState.getTransition(nextEvent) if self.get(transition.target.fanInGroup) is not None: self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup] taskNameBase = self.getTaskName(nextEvent, fanIn=True) rwlock = ReadWriteLock(taskNameBase, self) index = rwlock.currentIndex() # (***) # # grab the lock - memcache.incr() # # on Task retry, multiple incr() calls are possible. possible ways to handle: # # 1. release the lock in a 'finally' clause, but then risk missing a work # package because acquiring the read lock will succeed even though the # work package was not written yet. # # 2. allow the lock to get too high. the fan-in logic attempts to wait for # work packages across multiple-retry attempts, so this seems like the # best option. we basically trade a bit of latency in fan-in for reliability. # rwlock.acquireWriteLock(index, nextEvent=nextEvent) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) # on retry, we want to ensure we get the same work index for this task actualTaskName = self.__obj[constants.TASK_NAME_PARAM] indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None semaphore = RunOnceSemaphore(indexKeyName, self) # check if the workIndex changed during retry semaphoreWritten = False if self.__obj[constants.RETRY_COUNT_PARAM] > 0: # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False) if payload: semaphoreWritten = True if payload != workIndex: self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex) workIndex = payload # update this here so it gets written down into the work package too self[constants.INDEX_PARAM] = index # write down two models, one actual work package, one idempotency package keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName) # close enough to idempotent, but could still write only one of the entities # FIXME: could be made faster using a bulk put, but this interface is cleaner if not semaphoreWritten: semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False) # put the work item db.put(work) # (A) now the datastore is asynchronously writing the indices, so the work package may # not show up in a query for a period of time. there is a corresponding time.sleep() # in the fan-in of self.mergeJoinDispatch(...) # release the lock - memcache.decr() rwlock.releaseWriteLock(index) try: # insert a task to run in the future and process a bunch of work packages now = time.time() url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) task = Task(name='%s-%d' % (taskNameBase, index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers, retry_options=retryOptions) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get( constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) fork = self.pop(constants.FORK_PARAM, None) taskNameBase = self.getTaskName(nextEvent, fanIn=True) rwlock = ReadWriteLock(taskNameBase, self) index = rwlock.currentIndex() # (***) # # grab the lock - memcache.incr() # # on Task retry, multiple incr() calls are possible. possible ways to handle: # # 1. release the lock in a 'finally' clause, but then risk missing a work # package because acquiring the read lock will succeed even though the # work package was not written yet. # # 2. allow the lock to get too high. the fan-in logic attempts to wait for # work packages across multiple-retry attempts, so this seems like the # best option. we basically trade a bit of latency in fan-in for reliability. # rwlock.acquireWriteLock(index, nextEvent=nextEvent) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) # on retry, we want to ensure we get the same work index for this task actualTaskName = self.__obj[constants.TASK_NAME_PARAM] indexKeyName = 'workIndex-' + '-'.join( [str(i) for i in [actualTaskName, fork] if i]) or None semaphore = RunOnceSemaphore(indexKeyName, self) # check if the workIndex changed during retry semaphoreWritten = False if self.__obj[constants.RETRY_COUNT_PARAM] > 0: # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False) if payload: semaphoreWritten = True if payload != workIndex: self.logger.info( "Work index changed from '%s' to '%s' on retry.", payload, workIndex) workIndex = payload # write down two models, one actual work package, one idempotency package keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName) # close enough to idempotent, but could still write only one of the entities # FIXME: could be made faster using a bulk put, but this interface is cleaner if not semaphoreWritten: semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False) # put the work item db.put(work) # (A) now the datastore is asynchronously writing the indices, so the work package may # not show up in a query for a period of time. there is a corresponding time.sleep() # in the fan-in of self.mergeJoinDispatch(...) # release the lock - memcache.decr() rwlock.releaseWriteLock(index) try: # insert a task to run in the future and process a bunch of work packages now = time.time() self[constants.INDEX_PARAM] = index url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) task = Task(name='%s-%d' % (taskNameBase, index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers, retry_options=retryOptions) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic
def test_mergeJoinDispatch_1_context(self): _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put() self.assertEqual(1, _FantasmFanIn.all().count()) contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts) self.assertEqual(1, _FantasmFanIn.all().count())
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) self.pop(constants.FORK_PARAM, None) taskNameBase = self.getTaskName(nextEvent, fanIn=True) index = memcache.get('index-' + taskNameBase) if index is None: # using 'random.randint' here instead of '1' helps when the index is ejected from memcache # instead of restarting at the same counter, we jump (likely) far way from existing task job # names. memcache.add('index-' + taskNameBase, random.randint(1, 2**32)) index = memcache.get('index-' + taskNameBase) # grab the lock lock = '%s-lock-%d' % (taskNameBase, index) writers = memcache.incr(lock, initial_value=2**16) if writers < 2**16: memcache.decr(lock) # this will escape as a 500 error and the Task will be re-tried by appengine raise FanInWriteLockFailureRuntimeError(nextEvent, self.machineName, self.currentState.name, self.instanceName) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) work = _FantasmFanIn(context=self, workIndex=workIndex) work.put() # insert a task to run in the future and process a bunch of work packages now = time.time() try: self[constants.INDEX_PARAM] = index url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) # int(now / (fanInPeriod - 1 + 30)) included because it was in [2], but is less needed now that # we use random.randint in seeding memcache. for long fan in periods, and the case where random.randint # hits the same value twice, this may cause problems for up to fanInPeriod + 30s. # see: http://www.mail-archive.com/[email protected]/msg30408.html task = Task(name='%s-%d-%d' % (taskNameBase, int(now / (fanInPeriod - 1 + 30)), index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic finally: memcache.decr(lock)