Esempio n. 1
0
def OpenDeduplicateConnection(dedupe_namespace):
    """Open a connection to the isolate server for Dedupe use.

  Args:
    dedupe_namespace: String id for the comparison namespace.

  Returns:
    Connection proxy, or None on failure.
  """
    try:
        with timeout_util.Timeout(DEDUPE_TIMEOUT):
            return isolateserver.get_storage_api(constants.ISOLATESERVER,
                                                 dedupe_namespace)
    except Exception:
        logging.warning('initializing isolate server connection failed',
                        exc_info=True)
        return None
def SymbolDeduplicatorNotify(dedupe_namespace, dedupe_queue):
    """Send a symbol file to the swarming service

  Notify the swarming service of a successful upload.  If the notification fails
  for any reason, we ignore it.  We don't care as it just means we'll upload it
  again later on, and the symbol server will handle that graciously.

  This func runs in a different process from the main one, so we cannot share
  the storage object.  Instead, we create our own.  This func stays alive for
  the life of the process, so we only create one here overall.

  Args:
    dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
    dedupe_queue: The queue to read SymbolElements from
  """
    if dedupe_queue is None:
        return

    sym_file = ''
    try:
        with timeout_util.Timeout(DEDUPE_TIMEOUT):
            storage = isolateserver.get_storage_api(constants.ISOLATESERVER,
                                                    dedupe_namespace)
        for symbol_element in iter(dedupe_queue.get, None):
            if not symbol_element or not symbol_element.symbol_item:
                continue
            symbol_item = symbol_element.symbol_item
            push_state = symbol_element.opaque_push_state
            sym_file = symbol_item.sym_file if symbol_item.sym_file else ''
            if push_state is not None:
                with timeout_util.Timeout(DEDUPE_TIMEOUT):
                    logging.debug('sending %s to dedupe server', sym_file)
                    symbol_item.prepare(SymbolItem.ALGO)
                    storage.push(symbol_item, push_state,
                                 symbol_item.content())
                    logging.debug('sent %s', sym_file)
        logging.info('dedupe notification finished; exiting')
    except Exception:
        logging.warning('posting %s to dedupe server failed',
                        os.path.basename(sym_file),
                        exc_info=True)

        # Keep draining the queue though so it doesn't fill up.
        while dedupe_queue.get() is not None:
            continue
def SymbolDeduplicatorNotify(dedupe_namespace, dedupe_queue):
  """Send a symbol file to the swarming service

  Notify the swarming service of a successful upload.  If the notification fails
  for any reason, we ignore it.  We don't care as it just means we'll upload it
  again later on, and the symbol server will handle that graciously.

  This func runs in a different process from the main one, so we cannot share
  the storage object.  Instead, we create our own.  This func stays alive for
  the life of the process, so we only create one here overall.

  Args:
    dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
    dedupe_queue: The queue to read SymbolElements from
  """
  if dedupe_queue is None:
    return

  sym_file = ''
  try:
    with timeout_util.Timeout(DEDUPE_TIMEOUT):
      storage = isolateserver.get_storage_api(constants.ISOLATESERVER,
                                              dedupe_namespace)
    for symbol_element in iter(dedupe_queue.get, None):
      if not symbol_element or not symbol_element.symbol_item:
        continue
      symbol_item = symbol_element.symbol_item
      push_state = symbol_element.opaque_push_state
      sym_file = symbol_item.sym_file if symbol_item.sym_file else ''
      if push_state is not None:
        with timeout_util.Timeout(DEDUPE_TIMEOUT):
          logging.debug('sending %s to dedupe server', sym_file)
          symbol_item.prepare(SymbolItem.ALGO)
          storage.push(symbol_item, push_state, symbol_item.content())
          logging.debug('sent %s', sym_file)
    logging.info('dedupe notification finished; exiting')
  except Exception:
    logging.warning('posting %s to dedupe server failed',
                    os.path.basename(sym_file), exc_info=True)

    # Keep draining the queue though so it doesn't fill up.
    while dedupe_queue.get() is not None:
      continue
def UploadSymbols(board=None, official=False, server=None, breakpad_dir=None,
                  file_limit=DEFAULT_FILE_LIMIT, sleep=DEFAULT_SLEEP_DELAY,
                  upload_limit=None, sym_paths=None, failed_list=None,
                  root=None, retry=True, dedupe_namespace=None,
                  product_name='ChromeOS'):
  """Upload all the generated symbols for |board| to the crash server

  You can use in a few ways:
    * pass |board| to locate all of its symbols
    * pass |breakpad_dir| to upload all the symbols in there
    * pass |sym_paths| to upload specific symbols (or dirs of symbols)

  Args:
    board: The board whose symbols we wish to upload
    official: Use the official symbol server rather than the staging one
    server: Explicit server to post symbols to
    breakpad_dir: The full path to the breakpad directory where symbols live
    file_limit: The max file size of a symbol file before we try to strip it
    sleep: How long to sleep in between uploads
    upload_limit: If set, only upload this many symbols (meant for testing)
    sym_paths: Specific symbol files (or dirs of sym files) to upload,
      otherwise search |breakpad_dir|
    failed_list: Write the names of all sym files we did not upload; can be a
      filename or file-like object.
    root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set)
    retry: Whether we should retry failures.
    dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
    product_name: A string for stats purposes. Usually 'ChromeOS' or 'Android'.

  Returns:
    The number of errors that were encountered.
  """
  if server is None:
    if official:
      upload_url = OFFICIAL_UPLOAD_URL
    else:
      logging.warning('unofficial builds upload to the staging server')
      upload_url = STAGING_UPLOAD_URL
  else:
    upload_url = server

  if sym_paths:
    logging.info('uploading specified symbols to %s', upload_url)
  else:
    if breakpad_dir is None:
      if root is None:
        raise ValueError('breakpad_dir requires root to be set')
      breakpad_dir = os.path.join(
          root,
          cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip('/'))
    logging.info('uploading all symbols to %s from %s', upload_url,
                 breakpad_dir)
    sym_paths = [breakpad_dir]

  # We use storage_query to ask the server about existing symbols.  The
  # storage_notify_proc process is used to post updates to the server.  We
  # cannot safely share the storage object between threads/processes, but
  # we also want to minimize creating new ones as each object has to init
  # new state (like server connections).
  storage_query = None
  if dedupe_namespace:
    dedupe_limit = DEDUPE_LIMIT
    dedupe_queue = multiprocessing.Queue()
    try:
      with timeout_util.Timeout(DEDUPE_TIMEOUT):
        storage_query = isolateserver.get_storage_api(constants.ISOLATESERVER,
                                                      dedupe_namespace)
    except Exception:
      logging.warning('initializing dedupe server connection failed',
                      exc_info=True)
  else:
    dedupe_limit = 1
    dedupe_queue = None
  # Can't use parallel.BackgroundTaskRunner because that'll create multiple
  # processes and we want only one the whole time (see comment above).
  storage_notify_proc = multiprocessing.Process(
      target=SymbolDeduplicatorNotify, args=(dedupe_namespace, dedupe_queue))

  bg_errors = multiprocessing.Value('i')
  watermark_errors = multiprocessing.Value('f')
  failed_queue = multiprocessing.Queue()
  uploader = functools.partial(
      UploadSymbol, upload_url, product_name=product_name,
      file_limit=file_limit, sleep=sleep, num_errors=bg_errors,
      watermark_errors=watermark_errors, failed_queue=failed_queue,
      passed_queue=dedupe_queue)

  start_time = datetime.datetime.now()
  Counters = cros_build_lib.Collection(
      'Counters', upload_limit=upload_limit, uploaded_count=0, deduped_count=0)
  counters = Counters()

  def _Upload(queue, counters, files):
    if not files:
      return

    missing_count = 0
    for item in SymbolDeduplicator(storage_query, files):
      missing_count += 1

      if counters.upload_limit == 0:
        continue

      queue.put((item,))
      counters.uploaded_count += 1
      if counters.upload_limit is not None:
        counters.upload_limit -= 1

    counters.deduped_count += (len(files) - missing_count)

  try:
    storage_notify_proc.start()

    with osutils.TempDir(prefix='upload_symbols.') as tempdir:
      # For the first run, we collect the symbols that failed.  If the
      # overall failure rate was low, we'll retry them on the second run.
      for retry in (retry, False):
        # We need to limit ourselves to one upload at a time to avoid the server
        # kicking in DoS protection.  See these bugs for more details:
        # http://crbug.com/209442
        # http://crbug.com/212496
        with parallel.BackgroundTaskRunner(uploader, processes=1) as queue:
          dedupe_list = []
          for sym_file in SymbolFinder(tempdir, sym_paths):
            dedupe_list.append(sym_file)
            dedupe_len = len(dedupe_list)
            if dedupe_len < dedupe_limit:
              if (counters.upload_limit is None or
                  dedupe_len < counters.upload_limit):
                continue

            # We check the counter before _Upload so that we don't keep talking
            # to the dedupe server.  Otherwise, we end up sending one symbol at
            # a time to it and that slows things down a lot.
            if counters.upload_limit == 0:
              break

            _Upload(queue, counters, dedupe_list)
            dedupe_list = []
          _Upload(queue, counters, dedupe_list)

        # See if we need to retry, and if we haven't failed too many times yet.
        if not retry or ErrorLimitHit(bg_errors, watermark_errors):
          break

        sym_paths = []
        failed_queue.put(None)
        while True:
          sym_path = failed_queue.get()
          if sym_path is None:
            break
          sym_paths.append(sym_path)

        if sym_paths:
          logging.warning('retrying %i symbols', len(sym_paths))
          if counters.upload_limit is not None:
            counters.upload_limit += len(sym_paths)
          # Decrement the error count in case we recover in the second pass.
          assert bg_errors.value >= len(sym_paths), \
                 'more failed files than errors?'
          bg_errors.value -= len(sym_paths)
        else:
          # No failed symbols, so just return now.
          break

    # If the user has requested it, save all the symbol files that we failed to
    # upload to a listing file.  This should help with recovery efforts later.
    failed_queue.put(None)
    WriteQueueToFile(failed_list, failed_queue, breakpad_dir)

  finally:
    logging.info('finished uploading; joining background process')
    if dedupe_queue:
      dedupe_queue.put(None)

    # The notification might be slow going, so give it some time to finish.
    # We have to poll here as the process monitor is watching for output and
    # will kill us if we go silent for too long.
    wait_minutes = DEDUPE_NOTIFY_TIMEOUT
    while storage_notify_proc.is_alive() and wait_minutes > 0:
      if dedupe_queue:
        qsize = str(dedupe_queue.qsize())
      else:
        qsize = '[None]'
      logging.info('waiting up to %i minutes for ~%s notifications',
                   wait_minutes, qsize)
      storage_notify_proc.join(60)
      wait_minutes -= 1

    # The process is taking too long, so kill it and complain.
    if storage_notify_proc.is_alive():
      logging.warning('notification process took too long')
      logging.PrintBuildbotStepWarnings()

      # Kill it gracefully first (traceback) before tacking it down harder.
      pid = storage_notify_proc.pid
      for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL):
        logging.warning('sending %s to %i', signals.StrSignal(sig), pid)
        # The process might have exited between the last check and the
        # actual kill below, so ignore ESRCH errors.
        try:
          os.kill(pid, sig)
        except OSError as e:
          if e.errno == errno.ESRCH:
            break
          else:
            raise
        time.sleep(5)
        if not storage_notify_proc.is_alive():
          break

      # Drain the queue so we don't hang when we finish.
      try:
        logging.warning('draining the notify queue manually')
        with timeout_util.Timeout(60):
          try:
            while dedupe_queue.get_nowait():
              pass
          except Queue.Empty:
            pass
      except timeout_util.TimeoutError:
        logging.warning('draining the notify queue failed; trashing it')
        dedupe_queue.cancel_join_thread()

  logging.info('uploaded %i symbols (%i were deduped) which took: %s',
               counters.uploaded_count, counters.deduped_count,
               datetime.datetime.now() - start_time)

  return bg_errors.value
Esempio n. 5
0
def main():
    colorama.init()

    parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
    parser.add_option('-I',
                      '--isolate-server',
                      metavar='URL',
                      default='',
                      help='Isolate server to use')
    parser.add_option('--namespace',
                      default='temporary%d-gzip' % time.time(),
                      metavar='XX',
                      help='Namespace to use on the server, default: %default')
    parser.add_option('--threads',
                      type='int',
                      default=16,
                      metavar='N',
                      help='Parallel worker threads to use, default:%default')
    graph.unit_option(parser,
                      '--items',
                      default=0,
                      help='Number of items to upload')
    graph.unit_option(parser,
                      '--max-size',
                      default=0,
                      help='Loop until this amount of data was transferred')
    graph.unit_option(parser,
                      '--mid-size',
                      default=100 * 1024,
                      help='Rough average size of each item, default:%default')
    parser.add_option('--columns',
                      type='int',
                      default=graph.get_console_width(),
                      metavar='N',
                      help='For histogram display, default:%default')
    parser.add_option(
        '--buckets',
        type='int',
        default=20,
        metavar='N',
        help='Number of buckets for histogram display, default:%default')
    parser.add_option('--dump', metavar='FOO.JSON', help='Dumps to json file')
    parser.add_option('--dry-run',
                      action='store_true',
                      help='Do not send anything')
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      help='Enable logging')
    options, args = parser.parse_args()

    logging.basicConfig(
        level=logging.INFO if options.verbose else logging.FATAL)
    if args:
        parser.error('Unsupported args: %s' % args)
    if bool(options.max_size) == bool(options.items):
        parser.error(
            'Use one of --max-size or --items.\n'
            '  Use --max-size if you want to run it until NN bytes where '
            'transfered.\n'
            '  Otherwise use --items to run it for NN items.')
    if not options.dry_run:
        options.isolate_server = options.isolate_server.rstrip('/')
        if not options.isolate_server:
            parser.error('--isolate-server is required.')

    print(' - Using %d thread,  items=%d,  max-size=%d,  mid-size=%d' %
          (options.threads, options.items, options.max_size, options.mid_size))
    if options.dry_run:
        print(' - %sDRY RUN MODE%s' %
              (colorama.Fore.GREEN, colorama.Fore.RESET))

    start = time.time()

    random_pool = Randomness()
    print(' - Generated pool after %.1fs' % (time.time() - start))

    columns = [('index', 0), ('data', 0), ('size', options.items)]
    progress = Progress(columns)
    api = isolateserver.get_storage_api(options.isolate_server,
                                        options.namespace)
    do_item = functools.partial(
        send_and_receive, random_pool, options.dry_run,
        isolateserver.is_namespace_with_compression(options.namespace), api,
        progress)

    # TODO(maruel): Handle Ctrl-C should:
    # - Stop adding tasks.
    # - Stop scheduling tasks in ThreadPool.
    # - Wait for the remaining ungoing tasks to complete.
    # - Still print details and write the json file.
    with threading_utils.ThreadPoolWithProgress(progress, options.threads,
                                                options.threads, 0) as pool:
        if options.items:
            for _ in xrange(options.items):
                pool.add_task(0, do_item, gen_size(options.mid_size))
                progress.print_update()
        elif options.max_size:
            # This one is approximate.
            total = 0
            while True:
                size = gen_size(options.mid_size)
                progress.update_item('', size=1)
                progress.print_update()
                pool.add_task(0, do_item, size)
                total += size
                if total >= options.max_size:
                    break
        results = sorted(pool.join())

    print('')
    print(' - Took %.1fs.' % (time.time() - start))
    print('')
    print_results(results, options.columns, options.buckets)
    if options.dump:
        with open(options.dump, 'w') as f:
            json.dump(results, f, separators=(',', ':'))
    return 0
def main():
  colorama.init()

  parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
  parser.add_option(
      '-I', '--isolate-server',
      metavar='URL', default='',
      help='Isolate server to use')
  parser.add_option(
      '--namespace', default='temporary%d-gzip' % time.time(), metavar='XX',
      help='Namespace to use on the server, default: %default')
  parser.add_option(
      '--threads', type='int', default=16, metavar='N',
      help='Parallel worker threads to use, default:%default')
  graph.unit_option(
      parser, '--items', default=0, help='Number of items to upload')
  graph.unit_option(
      parser, '--max-size', default=0,
      help='Loop until this amount of data was transferred')
  graph.unit_option(
      parser, '--mid-size', default=100*1024,
      help='Rough average size of each item, default:%default')
  parser.add_option(
      '--columns', type='int', default=graph.get_console_width(), metavar='N',
      help='For histogram display, default:%default')
  parser.add_option(
      '--buckets', type='int', default=20, metavar='N',
      help='Number of buckets for histogram display, default:%default')
  parser.add_option(
      '--dump', metavar='FOO.JSON', help='Dumps to json file')
  parser.add_option(
      '--dry-run', action='store_true', help='Do not send anything')
  parser.add_option(
      '-v', '--verbose', action='store_true', help='Enable logging')
  options, args = parser.parse_args()

  logging.basicConfig(level=logging.INFO if options.verbose else logging.FATAL)
  if args:
    parser.error('Unsupported args: %s' % args)
  if bool(options.max_size) == bool(options.items):
    parser.error(
        'Use one of --max-size or --items.\n'
        '  Use --max-size if you want to run it until NN bytes where '
        'transfered.\n'
        '  Otherwise use --items to run it for NN items.')
  if not options.dry_run:
    options.isolate_server = options.isolate_server.rstrip('/')
    if not options.isolate_server:
      parser.error('--isolate-server is required.')

  print(
      ' - Using %d thread,  items=%d,  max-size=%d,  mid-size=%d' % (
      options.threads, options.items, options.max_size, options.mid_size))
  if options.dry_run:
    print(' - %sDRY RUN MODE%s' % (colorama.Fore.GREEN, colorama.Fore.RESET))

  start = time.time()

  random_pool = Randomness()
  print(' - Generated pool after %.1fs' % (time.time() - start))

  columns = [('index', 0), ('data', 0), ('size', options.items)]
  progress = Progress(columns)
  api = isolateserver.get_storage_api(options.isolate_server, options.namespace)
  do_item = functools.partial(
      send_and_receive,
      random_pool,
      options.dry_run,
      isolateserver.is_namespace_with_compression(options.namespace),
      api,
      progress)

  # TODO(maruel): Handle Ctrl-C should:
  # - Stop adding tasks.
  # - Stop scheduling tasks in ThreadPool.
  # - Wait for the remaining ungoing tasks to complete.
  # - Still print details and write the json file.
  with threading_utils.ThreadPoolWithProgress(
      progress, options.threads, options.threads, 0) as pool:
    if options.items:
      for _ in xrange(options.items):
        pool.add_task(0, do_item, gen_size(options.mid_size))
        progress.print_update()
    elif options.max_size:
      # This one is approximate.
      total = 0
      while True:
        size = gen_size(options.mid_size)
        progress.update_item('', size=1)
        progress.print_update()
        pool.add_task(0, do_item, size)
        total += size
        if total >= options.max_size:
          break
    results = sorted(pool.join())

  print('')
  print(' - Took %.1fs.' % (time.time() - start))
  print('')
  print_results(results, options.columns, options.buckets)
  if options.dump:
    with open(options.dump, 'w') as f:
      json.dump(results, f, separators=(',',':'))
  return 0
def UploadSymbols(board=None,
                  official=False,
                  server=None,
                  breakpad_dir=None,
                  file_limit=DEFAULT_FILE_LIMIT,
                  sleep=DEFAULT_SLEEP_DELAY,
                  upload_limit=None,
                  sym_paths=None,
                  failed_list=None,
                  root=None,
                  retry=True,
                  dedupe_namespace=None,
                  product_name='ChromeOS'):
    """Upload all the generated symbols for |board| to the crash server

  You can use in a few ways:
    * pass |board| to locate all of its symbols
    * pass |breakpad_dir| to upload all the symbols in there
    * pass |sym_paths| to upload specific symbols (or dirs of symbols)

  Args:
    board: The board whose symbols we wish to upload
    official: Use the official symbol server rather than the staging one
    server: Explicit server to post symbols to
    breakpad_dir: The full path to the breakpad directory where symbols live
    file_limit: The max file size of a symbol file before we try to strip it
    sleep: How long to sleep in between uploads
    upload_limit: If set, only upload this many symbols (meant for testing)
    sym_paths: Specific symbol files (or dirs of sym files) to upload,
      otherwise search |breakpad_dir|
    failed_list: Write the names of all sym files we did not upload; can be a
      filename or file-like object.
    root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set)
    retry: Whether we should retry failures.
    dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
    product_name: A string for stats purposes. Usually 'ChromeOS' or 'Android'.

  Returns:
    The number of errors that were encountered.
  """
    if server is None:
        if official:
            upload_url = OFFICIAL_UPLOAD_URL
        else:
            logging.warning('unofficial builds upload to the staging server')
            upload_url = STAGING_UPLOAD_URL
    else:
        upload_url = server

    if sym_paths:
        logging.info('uploading specified symbols to %s', upload_url)
    else:
        if breakpad_dir is None:
            if root is None:
                raise ValueError('breakpad_dir requires root to be set')
            breakpad_dir = os.path.join(
                root,
                cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip(
                    '/'))
        logging.info('uploading all symbols to %s from %s', upload_url,
                     breakpad_dir)
        sym_paths = [breakpad_dir]

    # We use storage_query to ask the server about existing symbols.  The
    # storage_notify_proc process is used to post updates to the server.  We
    # cannot safely share the storage object between threads/processes, but
    # we also want to minimize creating new ones as each object has to init
    # new state (like server connections).
    storage_query = None
    if dedupe_namespace:
        dedupe_limit = DEDUPE_LIMIT
        dedupe_queue = multiprocessing.Queue()
        try:
            with timeout_util.Timeout(DEDUPE_TIMEOUT):
                storage_query = isolateserver.get_storage_api(
                    constants.ISOLATESERVER, dedupe_namespace)
        except Exception:
            logging.warning('initializing dedupe server connection failed',
                            exc_info=True)
    else:
        dedupe_limit = 1
        dedupe_queue = None
    # Can't use parallel.BackgroundTaskRunner because that'll create multiple
    # processes and we want only one the whole time (see comment above).
    storage_notify_proc = multiprocessing.Process(
        target=SymbolDeduplicatorNotify, args=(dedupe_namespace, dedupe_queue))

    bg_errors = multiprocessing.Value('i')
    watermark_errors = multiprocessing.Value('f')
    failed_queue = multiprocessing.Queue()
    uploader = functools.partial(UploadSymbol,
                                 upload_url,
                                 product_name=product_name,
                                 file_limit=file_limit,
                                 sleep=sleep,
                                 num_errors=bg_errors,
                                 watermark_errors=watermark_errors,
                                 failed_queue=failed_queue,
                                 passed_queue=dedupe_queue)

    start_time = datetime.datetime.now()
    Counters = cros_build_lib.Collection('Counters',
                                         upload_limit=upload_limit,
                                         uploaded_count=0,
                                         deduped_count=0)
    counters = Counters()

    def _Upload(queue, counters, files):
        if not files:
            return

        missing_count = 0
        for item in SymbolDeduplicator(storage_query, files):
            missing_count += 1

            if counters.upload_limit == 0:
                continue

            queue.put((item, ))
            counters.uploaded_count += 1
            if counters.upload_limit is not None:
                counters.upload_limit -= 1

        counters.deduped_count += (len(files) - missing_count)

    try:
        storage_notify_proc.start()

        with osutils.TempDir(prefix='upload_symbols.') as tempdir:
            # For the first run, we collect the symbols that failed.  If the
            # overall failure rate was low, we'll retry them on the second run.
            for retry in (retry, False):
                # We need to limit ourselves to one upload at a time to avoid the server
                # kicking in DoS protection.  See these bugs for more details:
                # http://crbug.com/209442
                # http://crbug.com/212496
                with parallel.BackgroundTaskRunner(uploader,
                                                   processes=1) as queue:
                    dedupe_list = []
                    for sym_file in SymbolFinder(tempdir, sym_paths):
                        dedupe_list.append(sym_file)
                        dedupe_len = len(dedupe_list)
                        if dedupe_len < dedupe_limit:
                            if (counters.upload_limit is None
                                    or dedupe_len < counters.upload_limit):
                                continue

                        # We check the counter before _Upload so that we don't keep talking
                        # to the dedupe server.  Otherwise, we end up sending one symbol at
                        # a time to it and that slows things down a lot.
                        if counters.upload_limit == 0:
                            break

                        _Upload(queue, counters, dedupe_list)
                        dedupe_list = []
                    _Upload(queue, counters, dedupe_list)

                # See if we need to retry, and if we haven't failed too many times yet.
                if not retry or ErrorLimitHit(bg_errors, watermark_errors):
                    break

                sym_paths = []
                failed_queue.put(None)
                while True:
                    sym_path = failed_queue.get()
                    if sym_path is None:
                        break
                    sym_paths.append(sym_path)

                if sym_paths:
                    logging.warning('retrying %i symbols', len(sym_paths))
                    if counters.upload_limit is not None:
                        counters.upload_limit += len(sym_paths)
                    # Decrement the error count in case we recover in the second pass.
                    assert bg_errors.value >= len(sym_paths), \
                           'more failed files than errors?'
                    bg_errors.value -= len(sym_paths)
                else:
                    # No failed symbols, so just return now.
                    break

        # If the user has requested it, save all the symbol files that we failed to
        # upload to a listing file.  This should help with recovery efforts later.
        failed_queue.put(None)
        WriteQueueToFile(failed_list, failed_queue, breakpad_dir)

    finally:
        logging.info('finished uploading; joining background process')
        if dedupe_queue:
            dedupe_queue.put(None)

        # The notification might be slow going, so give it some time to finish.
        # We have to poll here as the process monitor is watching for output and
        # will kill us if we go silent for too long.
        wait_minutes = DEDUPE_NOTIFY_TIMEOUT
        while storage_notify_proc.is_alive() and wait_minutes > 0:
            if dedupe_queue:
                qsize = str(dedupe_queue.qsize())
            else:
                qsize = '[None]'
            logging.info('waiting up to %i minutes for ~%s notifications',
                         wait_minutes, qsize)
            storage_notify_proc.join(60)
            wait_minutes -= 1

        # The process is taking too long, so kill it and complain.
        if storage_notify_proc.is_alive():
            logging.warning('notification process took too long')
            logging.PrintBuildbotStepWarnings()

            # Kill it gracefully first (traceback) before tacking it down harder.
            pid = storage_notify_proc.pid
            for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL):
                logging.warning('sending %s to %i', signals.StrSignal(sig),
                                pid)
                # The process might have exited between the last check and the
                # actual kill below, so ignore ESRCH errors.
                try:
                    os.kill(pid, sig)
                except OSError as e:
                    if e.errno == errno.ESRCH:
                        break
                    else:
                        raise
                time.sleep(5)
                if not storage_notify_proc.is_alive():
                    break

            # Drain the queue so we don't hang when we finish.
            try:
                logging.warning('draining the notify queue manually')
                with timeout_util.Timeout(60):
                    try:
                        while dedupe_queue.get_nowait():
                            pass
                    except Queue.Empty:
                        pass
            except timeout_util.TimeoutError:
                logging.warning(
                    'draining the notify queue failed; trashing it')
                dedupe_queue.cancel_join_thread()

    logging.info('uploaded %i symbols (%i were deduped) which took: %s',
                 counters.uploaded_count, counters.deduped_count,
                 datetime.datetime.now() - start_time)

    return bg_errors.value