Esempio n. 1
0
def process_activity_loggers():
    """Processes the pending activities from the local request environ."""
    if not ACTIVITIES_ENVIRON_KEY in os.environ:
        return

    processors = {}
    deferred_activity_loggers = []
    for logger in os.environ[ACTIVITIES_ENVIRON_KEY]:
        # Discover all the processors.
        processors.update(getattr(logger, 'processors', {}))

    for logger in os.environ[ACTIVITIES_ENVIRON_KEY]:
        if logger.defer_finalize:
            deferred_activity_loggers.append(logger)

        # Perform processing.
        if hasattr(logger, 'processors'):
            logger.process(processors)

    if deferred_activity_loggers:
        deferred.defer(_finalize_activity_loggers,
                       deferred_activity_loggers,
                       _queue=ACTIVITY_QUEUE)

    for processor in processors.itervalues():
        pipelines.push(processor.batch_name,
                       processor.batch_processor,
                       data=processor.serialize())
Esempio n. 2
0
    def fan_in(self, work_item, eta_delta=PIPELINES_ETA_DEFAULT_DELTA):
        """Creates a fan in task for batch processing the work item."""
        counter = 0
        eta = datetime.datetime.utcfromtimestamp(time.time()) + eta_delta

        # Keep trying to find a valid work index when the taskname is tombstoned.
        while True:
            try:
                try:
                    # Use a named queue to perform the fan-in.
                    deferred.defer(self._fan_in,
                                   _name=self.work_index,
                                   _eta=eta,
                                   _queue=PIPELINES_QUEUE)
                except taskqueue.TaskAlreadyExistsError:
                    pass  # Expected error to fan-in the tasks.
                break  # End the loop since the task was created or fanned-in.
            except taskqueue.TombstonedTaskError:
                # Keep trying until we get to a non-tombstoned task name.
                # Increment randomly to find another non-tomestoned task name.
                memcache.incr(self.index_name, delta=random.randrange(1, 100))
                self._open_batch()  # Reattempt to reopen a new batch.
                counter += 1
                logging.info('Tombstoned error, retrying: {}'.format(counter))

        # Add processor to the Datastore if successfully fanned in.
        work_item.work_index = self.work_index
        work_item.put_async()
Esempio n. 3
0
  def post(self):
    key = self.request.get('key')
    group = self.request.get('group', tasks.DEFAULT_GROUP)
    num_widgets = self.request.get('num_widgets', '')
    if not key or not num_widgets.isdigit():
      self.abort(400)
    task_manager = tasks.TaskManager(key=key, group=group)
    if not task_manager.exists:
      self.abort(404)

    # Defer the expensive tasks using the TaskManager.
    num_widgets = int(num_widgets)
    widget_ids = ['widget-%s' % i for i in range(num_widgets)]

    if len(widget_ids) < 100:
      # If we think that enqueuing the tasks will take < 60s, just do it
      # sychronously with the start request to not delay the UI.
      create_deferred_widgets(task_manager, widget_ids)
    else:
      # Since just enqueuing the tasks may take > 60s, defer a task that
      # enqueues the tasks so we can take up to 10 minutes.
      deferred.defer(create_deferred_widgets, task_manager, widget_ids,
                     _queue='deferrer')

    self.response.headers['Content-Type'] = 'application/json'
    self.response.out.write(json.dumps({'widget_ids': widget_ids}))
Esempio n. 4
0
  def fan_in(self, work_item, eta_delta=PIPELINES_ETA_DEFAULT_DELTA):
    """Creates a fan in task for batch processing the work item."""
    counter = 0
    eta = datetime.datetime.utcfromtimestamp(time.time()) + eta_delta

    # Keep trying to find a valid work index when the taskname is tombstoned.
    while True:
      try:
        try:
          # Use a named queue to perform the fan-in.
          deferred.defer(self._fan_in, _name=self.work_index, _eta=eta,
                         _queue=PIPELINES_QUEUE)
        except taskqueue.TaskAlreadyExistsError:
          pass  # Expected error to fan-in the tasks.
        break  # End the loop since the task was created or fanned-in.
      except taskqueue.TombstonedTaskError:
        # Keep trying until we get to a non-tombstoned task name.
        # Increment randomly to find another non-tomestoned task name.
        memcache.incr(self.index_name, delta=random.randrange(1, 100))
        self._open_batch()  # Reattempt to reopen a new batch.
        counter += 1
        logging.info('Tombstoned error, retrying: {}'.format(counter))

    # Add processor to the Datastore if successfully fanned in.
    work_item.work_index = self.work_index
    work_item.put_async()
Esempio n. 5
0
def process_activity_loggers():
  """Processes the pending activities from the local request environ."""
  if not ACTIVITIES_ENVIRON_KEY in os.environ:
    return

  processors = {}
  deferred_activity_loggers = []
  for logger in os.environ[ACTIVITIES_ENVIRON_KEY]:
    # Discover all the processors.
    processors.update(getattr(logger, 'processors', {}))

  for logger in os.environ[ACTIVITIES_ENVIRON_KEY]:
    if logger.defer_finalize:
      deferred_activity_loggers.append(logger)

    # Perform processing.
    if hasattr(logger, 'processors'):
      logger.process(processors)

  if deferred_activity_loggers:
    deferred.defer(_finalize_activity_loggers, deferred_activity_loggers,
                   _queue=ACTIVITY_QUEUE)

  for processor in processors.itervalues():
    pipelines.push(
        processor.batch_name, processor.batch_processor,
        data=processor.serialize())
Esempio n. 6
0
    def post(self):
        key = self.request.get('key')
        group = self.request.get('group', tasks.DEFAULT_GROUP)
        num_widgets = self.request.get('num_widgets', '')
        if not key or not num_widgets.isdigit():
            self.abort(400)
        task_manager = tasks.TaskManager(key=key, group=group)
        if not task_manager.exists:
            self.abort(404)

        # Defer the expensive tasks using the TaskManager.
        num_widgets = int(num_widgets)
        widget_ids = ['widget-%s' % i for i in range(num_widgets)]

        if len(widget_ids) < 100:
            # If we think that enqueuing the tasks will take < 60s, just do it
            # sychronously with the start request to not delay the UI.
            create_deferred_widgets(task_manager, widget_ids)
        else:
            # Since just enqueuing the tasks may take > 60s, defer a task that
            # enqueues the tasks so we can take up to 10 minutes.
            deferred.defer(create_deferred_widgets,
                           task_manager,
                           widget_ids,
                           _queue='deferrer')

        self.response.headers['Content-Type'] = 'application/json'
        self.response.out.write(json.dumps({'widget_ids': widget_ids}))
Esempio n. 7
0
  def testDefer(self):
    self.login('*****@*****.**')
    path = '/foo.txt'
    content = 'hello'
    deferred.defer(WriteFile, path, content)
    self.RunDeferredTasks()

    # Verify the task ran.
    titan_file = files.File(path)
    self.assertTrue(titan_file.exists)
    self.assertEqual(content, titan_file.content)

    # Verify that [email protected] is the created_by user.
    self.assertEqual('*****@*****.**', titan_file.created_by.email)
Esempio n. 8
0
    def testDefer(self):
        self.login('*****@*****.**')
        path = '/foo.txt'
        content = 'hello'
        deferred.defer(WriteFile, path, content)
        self.RunDeferredTasks()

        # Verify the task ran.
        titan_file = files.File(path)
        self.assertTrue(titan_file.exists)
        self.assertEqual(content, titan_file.content)

        # Verify that [email protected] is the created_by user.
        self.assertEqual('*****@*****.**', titan_file.created_by.email)
Esempio n. 9
0
  def defer_task(self, task_key, callback, *args, **kwargs):
    """Runs the given callback in a task associated to the task manager.

    Args:
      task_key: A arbitrary, but unique key for this task. This is useful for
          linking individual deferred tasks to user-visible elements.
      callback: The function to defer which will be given *args and **kwargs.
    Raises:
      InvalidTaskManagerError: If the task manager does not exist.
      TaskManagerFinalizedError: If the task manager is already finalized.
      DuplicateTaskError: If the same task_key is used.
    """
    if not self.exists:
      raise InvalidTaskManagerError(
          'The task manager "%s" in group "%s" does not exist.'
          % (self.key, self.group))
    if self._finalized:
      raise TaskManagerFinalizedError(
          'The task manager has already been finalized, tasks can no longer '
          'be deferred using this task manager.')

    if self._task_keys is None:
      self._task_keys = set()
    if self._num_total is None:
      self._num_total = 0

    if task_key in self._task_keys:
      raise DuplicateTaskError(
          'Task with key "%s" has already been added.' % task_key)

    self._num_total += 1
    self._task_keys.add(task_key)

    # Extra data passed to the callback wrapper.
    kwargs['_task_data'] = {
        'task_manager_key': self.key,
        'task_manager_group': self.group,
        'task_key': task_key,
        'broadcast_channel_key': self._broadcast_channel_key,
    }
    kwargs['_queue'] = self._queue

    # Broadcast status of each task when it enters the queue.
    # Do this before the call to Defer, to ensure correct ordering.
    if self._broadcast_channel_key:
      _maybe_send_status_message(
          self._broadcast_channel, STATUS_QUEUED, task_key, self.key)

    deferred.defer(_callback_wrapper, callback, *args, **kwargs)
Esempio n. 10
0
    def defer_task(self, task_key, callback, *args, **kwargs):
        """Runs the given callback in a task associated to the task manager.

    Args:
      task_key: A arbitrary, but unique key for this task. This is useful for
          linking individual deferred tasks to user-visible elements.
      callback: The function to defer which will be given *args and **kwargs.
    Raises:
      InvalidTaskManagerError: If the task manager does not exist.
      TaskManagerFinalizedError: If the task manager is already finalized.
      DuplicateTaskError: If the same task_key is used.
    """
        if not self.exists:
            raise InvalidTaskManagerError(
                'The task manager "%s" in group "%s" does not exist.' %
                (self.key, self.group))
        if self._finalized:
            raise TaskManagerFinalizedError(
                'The task manager has already been finalized, tasks can no longer '
                'be deferred using this task manager.')

        if self._task_keys is None:
            self._task_keys = set()
        if self._num_total is None:
            self._num_total = 0

        if task_key in self._task_keys:
            raise DuplicateTaskError(
                'Task with key "%s" has already been added.' % task_key)

        self._num_total += 1
        self._task_keys.add(task_key)

        # Extra data passed to the callback wrapper.
        kwargs['_task_data'] = {
            'task_manager_key': self.key,
            'task_manager_group': self.group,
            'task_key': task_key,
            'broadcast_channel_key': self._broadcast_channel_key,
        }
        kwargs['_queue'] = self._queue

        # Broadcast status of each task when it enters the queue.
        # Do this before the call to Defer, to ensure correct ordering.
        if self._broadcast_channel_key:
            _maybe_send_status_message(self._broadcast_channel, STATUS_QUEUED,
                                       task_key, self.key)

        deferred.defer(_callback_wrapper, callback, *args, **kwargs)
Esempio n. 11
0
  def _close_batch(self):
    """Release the lock and increment the index to move to next batch."""
    # Cuttoff ability to add to the index since it is now processing.
    # Increment randomly to lower collision of task names when cache evicted.
    memcache.incr(self.index_name, delta=random.randrange(1, 25))
    # The processing has started, stop using index.
    memcache.decr(self.lock_name, _PIPELINES_SENTINAL_OFFSET_VALUE)

    # Add a task to cleanup any items that were missed from database delay.
    eta = (datetime.datetime.utcfromtimestamp(time.time()) +
           PIPELINES_ETA_BUFFER_CLEANUP)
    try:
      deferred.defer(self._process, _name=self.work_index + '-cleanup',
                     _eta=eta, _queue=PIPELINES_QUEUE)
    except taskqueue.TaskAlreadyExistsError:
      pass  # Expected error to fan-in the tasks.
Esempio n. 12
0
    def _close_batch(self):
        """Release the lock and increment the index to move to next batch."""
        # Cuttoff ability to add to the index since it is now processing.
        # Increment randomly to lower collision of task names when cache evicted.
        memcache.incr(self.index_name, delta=random.randrange(1, 25))
        # The processing has started, stop using index.
        memcache.decr(self.lock_name, _PIPELINES_SENTINAL_OFFSET_VALUE)

        # Add a task to cleanup any items that were missed from database delay.
        eta = (datetime.datetime.utcfromtimestamp(time.time()) +
               PIPELINES_ETA_BUFFER_CLEANUP)
        try:
            deferred.defer(self._process,
                           _name=self.work_index + '-cleanup',
                           _eta=eta,
                           _queue=PIPELINES_QUEUE)
        except taskqueue.TaskAlreadyExistsError:
            pass  # Expected error to fan-in the tasks.
Esempio n. 13
0
    def _process(self, cursor=None):
        """Query and process through all pipeline work items."""
        query = WorkItem.query()
        query = query.filter(WorkItem.work_index == self.work_index)

        results, cursor, has_more = query.fetch_page(PIPELINES_BATCH_SIZE,
                                                     start_cursor=cursor)
        if not results:
            return

        # Use fan-out to process in batches using cursor.
        if has_more:
            deferred.defer(self._process,
                           cursor=cursor,
                           _queue=PIPELINES_QUEUE)

        # Process the stored items.
        processor = None
        result_keys = []
        process_errors = []
        for result in results:
            try:
                if processor is None:
                    processor = pickle.loads(result.processor)
                processor.process(json.loads(result.data))
                result_keys.append(result.key)
            except Exception as e:
                logging.exception('Error processing data in the batch.')
                process_errors.append(e)

        try:
            processor.finalize()
            # Only remove when they successfully finalize.
            self._cleanup(result_keys)
        except Exception as e:
            logging.exception('Error finalizing data in the batch.')
            process_errors.append(e)

        if process_errors:
            raise PipelineProcessError(process_errors)

        logging.info('Processed {} WorkItems in the {} task'.format(
            len(result_keys), os.environ.get('HTTP_X_APPENGINE_TASKNAME', '')))
Esempio n. 14
0
  def _process(self, cursor=None):
    """Query and process through all pipeline work items."""
    query = WorkItem.query()
    query = query.filter(WorkItem.work_index == self.work_index)

    results, cursor, has_more = query.fetch_page(PIPELINES_BATCH_SIZE,
                                                 start_cursor=cursor)
    if not results:
      return

    # Use fan-out to process in batches using cursor.
    if has_more:
      deferred.defer(self._process, cursor=cursor, _queue=PIPELINES_QUEUE)

    # Process the stored items.
    processor = None
    result_keys = []
    process_errors = []
    for result in results:
      try:
        if processor is None:
          processor = pickle.loads(result.processor)
        processor.process(json.loads(result.data))
        result_keys.append(result.key)
      except Exception as e:
        logging.exception('Error processing data in the batch.')
        process_errors.append(e)

    try:
      processor.finalize()
      # Only remove when they successfully finalize.
      self._cleanup(result_keys)
    except Exception as e:
      logging.exception('Error finalizing data in the batch.')
      process_errors.append(e)

    if process_errors:
      raise PipelineProcessError(process_errors)

    logging.info(
        'Processed {} WorkItems in the {} task'.format(
            len(result_keys), os.environ.get('HTTP_X_APPENGINE_TASKNAME', '')))