def process_activity_loggers(): """Processes the pending activities from the local request environ.""" if not ACTIVITIES_ENVIRON_KEY in os.environ: return processors = {} deferred_activity_loggers = [] for logger in os.environ[ACTIVITIES_ENVIRON_KEY]: # Discover all the processors. processors.update(getattr(logger, 'processors', {})) for logger in os.environ[ACTIVITIES_ENVIRON_KEY]: if logger.defer_finalize: deferred_activity_loggers.append(logger) # Perform processing. if hasattr(logger, 'processors'): logger.process(processors) if deferred_activity_loggers: deferred.defer(_finalize_activity_loggers, deferred_activity_loggers, _queue=ACTIVITY_QUEUE) for processor in processors.itervalues(): pipelines.push(processor.batch_name, processor.batch_processor, data=processor.serialize())
def fan_in(self, work_item, eta_delta=PIPELINES_ETA_DEFAULT_DELTA): """Creates a fan in task for batch processing the work item.""" counter = 0 eta = datetime.datetime.utcfromtimestamp(time.time()) + eta_delta # Keep trying to find a valid work index when the taskname is tombstoned. while True: try: try: # Use a named queue to perform the fan-in. deferred.defer(self._fan_in, _name=self.work_index, _eta=eta, _queue=PIPELINES_QUEUE) except taskqueue.TaskAlreadyExistsError: pass # Expected error to fan-in the tasks. break # End the loop since the task was created or fanned-in. except taskqueue.TombstonedTaskError: # Keep trying until we get to a non-tombstoned task name. # Increment randomly to find another non-tomestoned task name. memcache.incr(self.index_name, delta=random.randrange(1, 100)) self._open_batch() # Reattempt to reopen a new batch. counter += 1 logging.info('Tombstoned error, retrying: {}'.format(counter)) # Add processor to the Datastore if successfully fanned in. work_item.work_index = self.work_index work_item.put_async()
def post(self): key = self.request.get('key') group = self.request.get('group', tasks.DEFAULT_GROUP) num_widgets = self.request.get('num_widgets', '') if not key or not num_widgets.isdigit(): self.abort(400) task_manager = tasks.TaskManager(key=key, group=group) if not task_manager.exists: self.abort(404) # Defer the expensive tasks using the TaskManager. num_widgets = int(num_widgets) widget_ids = ['widget-%s' % i for i in range(num_widgets)] if len(widget_ids) < 100: # If we think that enqueuing the tasks will take < 60s, just do it # sychronously with the start request to not delay the UI. create_deferred_widgets(task_manager, widget_ids) else: # Since just enqueuing the tasks may take > 60s, defer a task that # enqueues the tasks so we can take up to 10 minutes. deferred.defer(create_deferred_widgets, task_manager, widget_ids, _queue='deferrer') self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps({'widget_ids': widget_ids}))
def process_activity_loggers(): """Processes the pending activities from the local request environ.""" if not ACTIVITIES_ENVIRON_KEY in os.environ: return processors = {} deferred_activity_loggers = [] for logger in os.environ[ACTIVITIES_ENVIRON_KEY]: # Discover all the processors. processors.update(getattr(logger, 'processors', {})) for logger in os.environ[ACTIVITIES_ENVIRON_KEY]: if logger.defer_finalize: deferred_activity_loggers.append(logger) # Perform processing. if hasattr(logger, 'processors'): logger.process(processors) if deferred_activity_loggers: deferred.defer(_finalize_activity_loggers, deferred_activity_loggers, _queue=ACTIVITY_QUEUE) for processor in processors.itervalues(): pipelines.push( processor.batch_name, processor.batch_processor, data=processor.serialize())
def testDefer(self): self.login('*****@*****.**') path = '/foo.txt' content = 'hello' deferred.defer(WriteFile, path, content) self.RunDeferredTasks() # Verify the task ran. titan_file = files.File(path) self.assertTrue(titan_file.exists) self.assertEqual(content, titan_file.content) # Verify that [email protected] is the created_by user. self.assertEqual('*****@*****.**', titan_file.created_by.email)
def defer_task(self, task_key, callback, *args, **kwargs): """Runs the given callback in a task associated to the task manager. Args: task_key: A arbitrary, but unique key for this task. This is useful for linking individual deferred tasks to user-visible elements. callback: The function to defer which will be given *args and **kwargs. Raises: InvalidTaskManagerError: If the task manager does not exist. TaskManagerFinalizedError: If the task manager is already finalized. DuplicateTaskError: If the same task_key is used. """ if not self.exists: raise InvalidTaskManagerError( 'The task manager "%s" in group "%s" does not exist.' % (self.key, self.group)) if self._finalized: raise TaskManagerFinalizedError( 'The task manager has already been finalized, tasks can no longer ' 'be deferred using this task manager.') if self._task_keys is None: self._task_keys = set() if self._num_total is None: self._num_total = 0 if task_key in self._task_keys: raise DuplicateTaskError( 'Task with key "%s" has already been added.' % task_key) self._num_total += 1 self._task_keys.add(task_key) # Extra data passed to the callback wrapper. kwargs['_task_data'] = { 'task_manager_key': self.key, 'task_manager_group': self.group, 'task_key': task_key, 'broadcast_channel_key': self._broadcast_channel_key, } kwargs['_queue'] = self._queue # Broadcast status of each task when it enters the queue. # Do this before the call to Defer, to ensure correct ordering. if self._broadcast_channel_key: _maybe_send_status_message( self._broadcast_channel, STATUS_QUEUED, task_key, self.key) deferred.defer(_callback_wrapper, callback, *args, **kwargs)
def defer_task(self, task_key, callback, *args, **kwargs): """Runs the given callback in a task associated to the task manager. Args: task_key: A arbitrary, but unique key for this task. This is useful for linking individual deferred tasks to user-visible elements. callback: The function to defer which will be given *args and **kwargs. Raises: InvalidTaskManagerError: If the task manager does not exist. TaskManagerFinalizedError: If the task manager is already finalized. DuplicateTaskError: If the same task_key is used. """ if not self.exists: raise InvalidTaskManagerError( 'The task manager "%s" in group "%s" does not exist.' % (self.key, self.group)) if self._finalized: raise TaskManagerFinalizedError( 'The task manager has already been finalized, tasks can no longer ' 'be deferred using this task manager.') if self._task_keys is None: self._task_keys = set() if self._num_total is None: self._num_total = 0 if task_key in self._task_keys: raise DuplicateTaskError( 'Task with key "%s" has already been added.' % task_key) self._num_total += 1 self._task_keys.add(task_key) # Extra data passed to the callback wrapper. kwargs['_task_data'] = { 'task_manager_key': self.key, 'task_manager_group': self.group, 'task_key': task_key, 'broadcast_channel_key': self._broadcast_channel_key, } kwargs['_queue'] = self._queue # Broadcast status of each task when it enters the queue. # Do this before the call to Defer, to ensure correct ordering. if self._broadcast_channel_key: _maybe_send_status_message(self._broadcast_channel, STATUS_QUEUED, task_key, self.key) deferred.defer(_callback_wrapper, callback, *args, **kwargs)
def _close_batch(self): """Release the lock and increment the index to move to next batch.""" # Cuttoff ability to add to the index since it is now processing. # Increment randomly to lower collision of task names when cache evicted. memcache.incr(self.index_name, delta=random.randrange(1, 25)) # The processing has started, stop using index. memcache.decr(self.lock_name, _PIPELINES_SENTINAL_OFFSET_VALUE) # Add a task to cleanup any items that were missed from database delay. eta = (datetime.datetime.utcfromtimestamp(time.time()) + PIPELINES_ETA_BUFFER_CLEANUP) try: deferred.defer(self._process, _name=self.work_index + '-cleanup', _eta=eta, _queue=PIPELINES_QUEUE) except taskqueue.TaskAlreadyExistsError: pass # Expected error to fan-in the tasks.
def _process(self, cursor=None): """Query and process through all pipeline work items.""" query = WorkItem.query() query = query.filter(WorkItem.work_index == self.work_index) results, cursor, has_more = query.fetch_page(PIPELINES_BATCH_SIZE, start_cursor=cursor) if not results: return # Use fan-out to process in batches using cursor. if has_more: deferred.defer(self._process, cursor=cursor, _queue=PIPELINES_QUEUE) # Process the stored items. processor = None result_keys = [] process_errors = [] for result in results: try: if processor is None: processor = pickle.loads(result.processor) processor.process(json.loads(result.data)) result_keys.append(result.key) except Exception as e: logging.exception('Error processing data in the batch.') process_errors.append(e) try: processor.finalize() # Only remove when they successfully finalize. self._cleanup(result_keys) except Exception as e: logging.exception('Error finalizing data in the batch.') process_errors.append(e) if process_errors: raise PipelineProcessError(process_errors) logging.info('Processed {} WorkItems in the {} task'.format( len(result_keys), os.environ.get('HTTP_X_APPENGINE_TASKNAME', '')))
def _process(self, cursor=None): """Query and process through all pipeline work items.""" query = WorkItem.query() query = query.filter(WorkItem.work_index == self.work_index) results, cursor, has_more = query.fetch_page(PIPELINES_BATCH_SIZE, start_cursor=cursor) if not results: return # Use fan-out to process in batches using cursor. if has_more: deferred.defer(self._process, cursor=cursor, _queue=PIPELINES_QUEUE) # Process the stored items. processor = None result_keys = [] process_errors = [] for result in results: try: if processor is None: processor = pickle.loads(result.processor) processor.process(json.loads(result.data)) result_keys.append(result.key) except Exception as e: logging.exception('Error processing data in the batch.') process_errors.append(e) try: processor.finalize() # Only remove when they successfully finalize. self._cleanup(result_keys) except Exception as e: logging.exception('Error finalizing data in the batch.') process_errors.append(e) if process_errors: raise PipelineProcessError(process_errors) logging.info( 'Processed {} WorkItems in the {} task'.format( len(result_keys), os.environ.get('HTTP_X_APPENGINE_TASKNAME', '')))