def setUp(self): super(FanInMergeJoinDispatchTest, self).setUp() setUpByString(self, FAN_IN_MACHINE, machineName='FanInMachine', instanceName='foo') mock('config.currentConfiguration', returns=self.currentConfig, tracker=None) for i in range(20): SimpleModel(key_name='%d' % i).put() memcache.set('calls', 0) memcache.set('raise', False) context = self.factory.createFSMInstance(self.machineConfig.name, instanceName='foo') context[constants.STEPS_PARAM] = 1 obj = TemporaryStateObject() obj[constants.TASK_NAME_PARAM] = 'taskName' obj[constants.RETRY_COUNT_PARAM] = 0 self.context = None self.obj = None random.seed(0) context.dispatch('pseudo-init', obj) # write down a work package self.index = context[constants.INDEX_PARAM] self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual('foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex)
def setUp(self): super(FanInMergeJoinDispatchTest, self).setUp() setUpByString(self, FAN_IN_MACHINE, machineName='FanInMachine', instanceName='foo') mock('config.currentConfiguration', returns=self.currentConfig, tracker=None) for i in range(20): SimpleModel(key_name='%d' % i).put() memcache.set('calls', 0) memcache.set('raise', False) context = self.factory.createFSMInstance(self.machineConfig.name, instanceName='foo') context[constants.STEPS_PARAM] = 1 obj = TemporaryStateObject() obj[constants.TASK_NAME_PARAM] = 'taskName' obj[constants.RETRY_COUNT_PARAM] = 0 self.context = None self.obj = None random.seed(0) context.dispatch('pseudo-init', obj) # write down a work package self.index = context[constants.INDEX_PARAM] self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual( 'foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex)
def test_DatastoreFSMContinuationFanIn_work_packages_restored_on_exception(self): obj = TemporaryStateObject() obj[TASK_NAME_PARAM] = 'taskName' obj[RETRY_COUNT_PARAM] = 0 event = self.context.initialize() self.assertTrue(FSM.PSEUDO_INIT, self.context.currentState.name) self.assertFalse(self.context.currentState.isContinuation) event = self.context.dispatch(event, obj) self.assertEqual('state-initial', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) # override the action of the transition raise an exception originalAction = self.context.currentState.getTransition(event).action try: self.context.currentState.getTransition(event).action = RaiseExceptionAction() self.assertRaises(Exception, self.context.dispatch, event, obj) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) # the work packages are restored on exception finally: self.context.currentState.getTransition(event).action = originalAction # and restore event = self.context.dispatch(event, obj) self.assertEqual('state-fan-in', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-final', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count())
def test_mergeJoinDispatch_1234_contexts(self): for i in xrange(1234): _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put() self.assertEqual(1000, _FantasmFanIn.all().count()) # can't get them all with .count() contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual(1234, len(contexts)) self.assertEqual(1000, _FantasmFanIn.all().count())
def test_mergeJoinDispatch_1_context(self): _FantasmFanIn( workIndex='instanceName--foo--event--foo2--step-0-2654435761').put( ) self.assertEqual(1, _FantasmFanIn.all().count()) contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts) self.assertEqual(1, _FantasmFanIn.all().count())
def test_mergeJoinDispatch_1234_contexts(self): for i in xrange(1234): _FantasmFanIn( workIndex='instanceName--foo--event--foo2--step-0-2654435761' ).put() self.assertEqual( 1000, _FantasmFanIn.all().count()) # can't get them all with .count() contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual(1234, len(contexts)) self.assertEqual(1000, _FantasmFanIn.all().count())
def test_fail_at_acquireWriteLock(self): self.setUpContext() mock('ReadWriteLock.acquireWriteLock', raises=Exception, tracker=None) self.assertRaises(Exception, self.context.dispatch, 'pseudo-init', self.obj) self.assertEqual(0, _FantasmFanIn.all().count()) self.assertEqual(None, memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373')) restore() self.setUpContext(retryCount=1) self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual('foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual('65536', memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def test_run_twice(self): self.setUpContext() self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual('foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual('65536', memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373')) self.setUpContext() self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual('foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual('65536', memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def test_fail_at_put(self): self.setUpContext() mock('db.put', raises=Exception, tracker=None) self.assertRaises(Exception, self.context.dispatch, 'pseudo-init', copy.copy(self.obj)) self.assertEqual(0, _FantasmFanIn.all(namespace='').count()) # notice the +1 extra on the lock self.assertEqual(65537, memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373')) restore() self.setUpContext(retryCount=1) self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all(namespace='').count()) self.assertEqual('foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all(namespace='').get().workIndex) self.assertEqual(65537, memcache.get('foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) self.logger.debug('Index: %s', index) taskNameBase = self.getTaskName(event, fanIn=True) # see comment (***) in self._queueDispatchFanIn # # in the case of failing to acquire a read lock (due to failed release of write lock) # we have decided to keep retrying raiseOnFail = False if self._getTaskRetryLimit() is not None: raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM]) rwlock = ReadWriteLock(taskNameBase, self) rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts, guarded=False): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName self.guarded = guarded # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) # the following step ensure that fan-in only ever operates one time over a list of data # the entity is created in State.dispatch(...) _after_ all the actions have executed # successfully khash = knuthHash(index) self.logger.debug('knuthHash of index: %s', khash) workIndex = '%s-%d' % (taskNameBase, khash) if obj[constants.RETRY_COUNT_PARAM] > 0: semaphore = RunOnceSemaphore(workIndex, self) if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]): self.logger.info("Fan-in idempotency guard for workIndex '%s', not processing any work items.", workIndex) return FSMContextList(self, [], guarded=True) # don't operate over the data again # fetch all the work packages in the current group for processing query = _FantasmFanIn.all(namespace='') \ .filter('workIndex =', workIndex) \ .order('__key__') # construct a list of FSMContexts contexts = [self.clone(replaceData=r.context) for r in query] return FSMContextList(self, contexts)
def test_batchsize_10(self): self.context['batchsize'] = 10 self.context.initialize() # queues the first task runQueuedTasks(maxRetries=0) self.assertEqual(1 + self.EXTRA_COUNT, _FantasmFanIn.all().count()) self.assertEqual(self.EXTRA_VALUES + self.EXPECTED_VALUES, sorted(ContinuationFanInResult.get_by_key_name('test').values))
def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) taskNameBase = self.getTaskName(event, fanIn=True) # see comment (***) in self._queueDispatchFanIn # # in the case of failing to acquire a read lock (due to failed release of write lock) # we have decided to keep retrying raiseOnFail = False if self._getTaskRetryLimit() is not None: raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM]) rwlock = ReadWriteLock(taskNameBase, self) rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) # the following step ensure that fan-in only ever operates one time over a list of data # the entity is created in State.dispatch(...) _after_ all the actions have executed # successfully workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) if obj[constants.RETRY_COUNT_PARAM] > 0: semaphore = RunOnceSemaphore(workIndex, self) if semaphore.readRunOnceSemaphore( payload=self.__obj[constants.TASK_NAME_PARAM]): self.logger.info( "Fan-in idempotency guard for workIndex '%s', not processing any work items.", workIndex) return FSMContextList(self, []) # don't operate over the data again # fetch all the work packages in the current group for processing query = _FantasmFanIn.all() \ .filter('workIndex =', workIndex) \ .order('__key__') # construct a list of FSMContexts contexts = [self.clone(data=r.context) for r in query] return FSMContextList(self, contexts)
def test_batchsize_11(self): self.context['batchsize'] = 11 self.context.initialize() # queues the first task runQueuedTasks(maxRetries=0) self.assertEqual(1, _FantasmFanIn.all(namespace='').count()) self.assertEqual( self.EXPECTED_VALUES, sorted(ContinuationFanInResult.get_by_key_name('test').values))
def test_batchsize_3(self): self.context['batchsize'] = 3 self.context.initialize() # queues the first task runQueuedTasks() self.assertEqual(4, _FantasmFanIn.all().count()) self.assertEqual( self.EXPECTED_VALUES, sorted(ContinuationFanInResult.get_by_key_name('test').values))
def test_DatastoreFSMContinuationFanIn_smoke_test(self): event = self.context.initialize() self.assertTrue(FSM.PSEUDO_INIT, self.context.currentState.name) self.assertFalse(self.context.currentState.isContinuation) event = self.context.dispatch(event, TemporaryStateObject()) self.assertEqual('state-initial', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count()) event = self.context.dispatch(event, TemporaryStateObject()) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) event = self.context.dispatch(event, TemporaryStateObject()) self.assertEqual('state-fan-in', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count()) event = self.context.dispatch(event, TemporaryStateObject()) self.assertEqual('state-final', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count())
def test_DatastoreFSMContinuationFanIn_work_packages_restored_on_exception( self): obj = TemporaryStateObject() obj[TASK_NAME_PARAM] = 'taskName' obj[RETRY_COUNT_PARAM] = 0 event = self.context.initialize() self.assertTrue(FSM.PSEUDO_INIT, self.context.currentState.name) self.assertFalse(self.context.currentState.isContinuation) event = self.context.dispatch(event, obj) self.assertEqual('state-initial', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) # override the action of the transition raise an exception originalAction = self.context.currentState.getTransition(event).action try: self.context.currentState.getTransition( event).action = RaiseExceptionAction() self.assertRaises(Exception, self.context.dispatch, event, obj) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count() ) # the work packages are restored on exception finally: self.context.currentState.getTransition( event).action = originalAction # and restore event = self.context.dispatch(event, obj) self.assertEqual('state-fan-in', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-final', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count())
def test_fail_at_acquireWriteLock(self): self.setUpContext() mock('ReadWriteLock.acquireWriteLock', raises=Exception, tracker=None) self.assertRaises(Exception, self.context.dispatch, 'pseudo-init', self.obj) self.assertEqual(0, _FantasmFanIn.all().count()) self.assertEqual( None, memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373')) restore() self.setUpContext(retryCount=1) self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual( 'foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual( '65536', memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def test_run_twice(self): self.setUpContext() self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual( 'foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual( '65536', memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373')) self.setUpContext() self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual( 'foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual( '65536', memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def test_fail_at_put(self): self.setUpContext() mock('db.put', raises=Exception, tracker=None) self.assertRaises(Exception, self.context.dispatch, 'pseudo-init', copy.copy(self.obj)) self.assertEqual(0, _FantasmFanIn.all().count()) # notice the +1 extra on the lock self.assertEqual( '65537', memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373')) restore() self.setUpContext(retryCount=1) self.context.dispatch('pseudo-init', self.obj) self.assertEqual(1, _FantasmFanIn.all().count()) self.assertEqual( 'foo--InitialState--ok--FanInState--step-2-2957927341', _FantasmFanIn.all().get().workIndex) self.assertEqual( '65537', memcache.get( 'foo--InitialState--ok--FanInState--step-2-lock-3255389373'))
def test_DatastoreFSMContinuationFanIn_smoke_test(self): event = self.context.initialize() self.assertTrue(FSM.PSEUDO_INIT, self.context.currentState.name) self.assertFalse(self.context.currentState.isContinuation) obj = TemporaryStateObject() obj[TASK_NAME_PARAM] = 'taskName' obj[RETRY_COUNT_PARAM] = 0 event = self.context.dispatch(event, obj) self.assertEqual('state-initial', self.context.currentState.name) self.assertEqual(0, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-continuation', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-fan-in', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count()) event = self.context.dispatch(event, obj) self.assertEqual('state-final', self.context.currentState.name) self.assertEqual(1, _FantasmFanIn.all().count())
def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) taskNameBase = self.getTaskName(event, fanIn=True) # tell writers to use another index memcache.incr('index-' + taskNameBase) lock = '%s-lock-%d' % (taskNameBase, index) memcache.decr(lock, 2**15) # tell writers they missed the boat # 20 iterations * 0.25s = 5s total wait time busyWaitIters = 20 busyWaitIterSecs = 0.250 # busy wait for writers for i in xrange(busyWaitIters): counter = memcache.get(lock) # counter is None --> ejected from memcache # int(counter) <= 2**15 --> writers have all called memcache.decr if counter is None or int(counter) <= 2**15: break time.sleep(busyWaitIterSecs) self.logger.debug("Tried to acquire lock '%s' %d times...", lock, i + 1) # FIXME: is there anything else that can be done? will work packages be lost? maybe queue another task # to sweep up later? if i >= (busyWaitIters - 1): # pylint: disable-msg=W0631 self.logger.error("Gave up waiting for all fan-in work items.") # at this point we could have two tasks trying to process the same work packages. in the # happy path this will not likely happen because the tasks are sent off with different ETAs, # however in the unhappy path, it is possible for multiple tasks to be executing (retry on # 500 etc.). we solve this with a read lock using memcache. # # FIXME: would using a transaction on db.delete work if using ancestors? one task would win the # race to delete the the work based on a transaction error? readlock = '%s-readlock-%d' % (taskNameBase, index) haveReadLock = False try: # put the actual name of the winning task into to lock actualTaskName = self.get(constants.TASK_NAME_PARAM) added = memcache.add(readlock, actualTaskName, time=30) # FIXME: is 30s appropriate? lockValue = memcache.get(readlock) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName # if the lock value is not equal to the added value, it means this task lost the race if not added or lockValue != actualTaskName: return FSMContextList(self, []) # raise FanInReadLockFailureRuntimeError(event, # self.machineName, # self.currentState.name, # self.instanceName) # flag used in finally block to decide whether or not to log an error message haveReadLock = True # fetch all the work packages in the current group for processing workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) query = _FantasmFanIn.all() \ .filter('workIndex =', workIndex) \ .order('__key__') # iterate over the query to fetch results - this is done in 'small batches' fanInResults = list(query) # construct a list of FSMContexts contexts = [self.clone(data=r.context) for r in fanInResults] # hold the fanInResult around in case we need to re-put them (on an Exception) obj[constants.FAN_IN_RESULTS_PARAM] = fanInResults # and delete the work packages - bearing in mind appengine limits maxDeleteSize = 250 # appengine does not like to delete > 500 models at a time, 250 is a nice safe number if len(fanInResults) > maxDeleteSize: self.logger.warning("%d contexts in the current batch. Consider decreasing fan-in.", len(fanInResults)) i = 0 while fanInResults[i:i+maxDeleteSize]: db.delete(fanInResults[i:i+maxDeleteSize]) i += maxDeleteSize return FSMContextList(self, contexts) finally: deleted = memcache.delete(readlock) # FIXME: is there anything else that can be done? if haveReadLock and deleted == memcache.DELETE_NETWORK_FAILURE: self.logger.error("Unable to release the fan in read lock.")
def post(self): """ Runs the serialized function """ q = _FantasmFanIn.all().filter('workIndex =', self.request.POST['workIndex']) db.delete(q)
def test_mergeJoinDispatch_1_context(self): _FantasmFanIn(workIndex='instanceName--foo--event--foo2--step-0-2654435761').put() self.assertEqual(1, _FantasmFanIn.all().count()) contexts = self.context.mergeJoinDispatch('event', {RETRY_COUNT_PARAM: 0}) self.assertEqual([{'__ix__': 1, '__step__': 0}], contexts) self.assertEqual(1, _FantasmFanIn.all().count())
def post(self): """ Runs the serialized function """ q = _FantasmFanIn.all().filter('workIndex =', self.request.POST[constants.WORK_INDEX_PARAM]) db.delete(q)
def post(self): """ Runs the serialized function """ q = _FantasmFanIn.all().filter( 'workIndex =', self.request.POST[constants.WORK_INDEX_PARAM]) db.delete(q)
def test_batchsize_3(self): self.context['batchsize'] = 3 self.context.initialize() # queues the first task runQueuedTasks() self.assertEqual(4, _FantasmFanIn.all().count()) self.assertEqual(self.EXPECTED_VALUES, sorted(ContinuationFanInResult.get_by_key_name('test').values))