Example #1
0
 def write(self, digest, content):
     assert content is not None
     with self._lock:
         self._protected = self._protected or digest
     path = self._path(digest)
     # A stale broken file may remain. It is possible for the file to have write
     # access bit removed which would cause the file_write() call to fail to open
     # in write mode. Take no chance here.
     file_path.try_remove(path)
     try:
         size = file_write(path, content)
     except:
         # There are two possible places were an exception can occur:
         #   1) Inside |content| generator in case of network or unzipping errors.
         #   2) Inside file_write itself in case of disk IO errors.
         # In any case delete an incomplete file and propagate the exception to
         # caller, it will be logged there.
         file_path.try_remove(path)
         raise
     # Make the file read-only in the cache.  This has a few side-effects since
     # the file node is modified, so every directory entries to this file becomes
     # read-only. It's fine here because it is a new file.
     file_path.set_read_only(path, True)
     with self._lock:
         self._add(digest, size)
     return digest
Example #2
0
    def _load(self, trim, time_fn):
        """Loads state of the cache from json file.

    If cache_dir does not exist on disk, it is created.
    """
        self._lock.assert_locked()

        if not fs.isfile(self.state_file):
            if not fs.isdir(self.cache_dir):
                fs.makedirs(self.cache_dir)
        else:
            # Load state of the cache.
            try:
                self._lru = lru.LRUDict.load(self.state_file)
            except ValueError as err:
                logging.error('Failed to load cache state: %s' % (err, ))
                # Don't want to keep broken state file.
                file_path.try_remove(self.state_file)
        if time_fn:
            self._lru.time_fn = time_fn
        if trim:
            self._trim()
        # We want the initial cache size after trimming, i.e. what is readily
        # avaiable.
        self._initial_number_items = len(self._lru)
        self._initial_size = sum(self._lru.itervalues())
        if self._evicted:
            logging.info('Trimming evicted items with the following sizes: %s',
                         sorted(self._evicted))
Example #3
0
 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
     """Deletes cache file from the file system."""
     self._lock.assert_locked()
     try:
         if size == UNKNOWN_FILE_SIZE:
             try:
                 size = fs.stat(self._path(digest)).st_size
             except OSError:
                 size = 0
         file_path.try_remove(self._path(digest))
         self._evicted.append(size)
         self._free_disk += size
     except OSError as e:
         if e.errno != errno.ENOENT:
             logging.error('Error attempting to delete a file %s:\n%s' %
                           (digest, e))
Example #4
0
    def cleanup(self):
        """Cleans up the cache directory.

    Ensures there is no unknown files in cache_dir.
    Ensures the read-only bits are set correctly.

    At that point, the cache was already loaded, trimmed to respect cache
    policies.
    """
        with self._lock:
            fs.chmod(self.cache_dir, 0700)
            # Ensure that all files listed in the state still exist and add new ones.
            previous = set(self._lru)
            # It'd be faster if there were a readdir() function.
            for filename in fs.listdir(self.cache_dir):
                if filename == self.STATE_FILE:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0600)
                    continue
                if filename in previous:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0400)
                    previous.remove(filename)
                    continue

                # An untracked file. Delete it.
                logging.warning('Removing unknown file %s from cache',
                                filename)
                p = self._path(filename)
                if fs.isdir(p):
                    try:
                        file_path.rmtree(p)
                    except OSError:
                        pass
                else:
                    file_path.try_remove(p)
                continue

            if previous:
                # Filter out entries that were not found.
                logging.warning('Removed %d lost files', len(previous))
                for filename in previous:
                    self._lru.pop(filename)
                self._save()
Example #5
0
    def _load(self, trim, time_fn):
        """Loads state of the cache from json file.

    If cache_dir does not exist on disk, it is created.
    """
        self._lock.assert_locked()

        if not fs.isfile(self.state_file):
            if not fs.isdir(self.cache_dir):
                fs.makedirs(self.cache_dir)
        else:
            # Load state of the cache.
            try:
                self._lru = lru.LRUDict.load(self.state_file)
            except ValueError as err:
                logging.error('Failed to load cache state: %s' % (err, ))
                # Don't want to keep broken state file.
                file_path.try_remove(self.state_file)
        if time_fn:
            self._lru.time_fn = time_fn
        if trim:
            self._trim()
Example #6
0
def run_command(remote, task_details, work_dir, cost_usd_hour,
                task_start, run_isolated_flags, bot_file, ctx_file):
  """Runs a command and sends packets to the server to stream results back.

  Implements both I/O and hard timeouts. Sends the packets numbered, so the
  server can ensure they are processed in order.

  Returns:
    Metadata dict with the execution result.

  Raises:
    ExitSignal if caught some signal when starting or stopping.
    InternalError on unexpected internal errors.
  """
  # Signal the command is about to be started. It is important to post a task
  # update *BEFORE* starting any user code to signify the server that the bot
  # correctly started processing the task. In the case of non-idempotent task,
  # this signal is used to know if it is safe to retry the task or not. See
  # _reap_task() in task_scheduler.py for more information.
  start = monotonic_time()
  params = {
    'cost_usd': cost_usd_hour * (start - task_start) / 60. / 60.,
  }
  if not remote.post_task_update(
      task_details.task_id, task_details.bot_id, params):
    # Don't even bother, the task was already canceled.
    return {
      u'exit_code': -1,
      u'hard_timeout': False,
      u'io_timeout': False,
      u'must_signal_internal_failure': None,
      u'version': OUT_VERSION,
    }

  isolated_result = os.path.join(work_dir, 'isolated_result.json')
  args = get_isolated_args(work_dir, task_details,
                           isolated_result, bot_file, run_isolated_flags)
  # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to
  # give one 'grace_period' slot to the child process and one slot to upload
  # the results back.
  task_details.hard_timeout = 0
  if task_details.grace_period:
    task_details.grace_period *= 2

  try:
    proc = _start_task_runner(args, work_dir, ctx_file)
  except _FailureOnStart as e:
    return fail_without_command(
        remote, task_details.bot_id, task_details.task_id, params,
        cost_usd_hour, task_start, e.exit_code, e.stdout)

  buf = _OutputBuffer(task_details, start)
  try:
    # Monitor the task
    exit_code = None
    had_io_timeout = False
    must_signal_internal_failure = None
    kill_sent = False
    timed_out = None
    try:
      for channel, new_data in proc.yield_any(
          maxsize=buf.maxsize, timeout=lambda: buf.calc_yield_wait(timed_out)):
        buf.add(channel, new_data)

        # Post update if necessary.
        if buf.should_post_update():
          params['cost_usd'] = (
              cost_usd_hour * (monotonic_time() - task_start) / 60. / 60.)
          if not remote.post_task_update(
              task_details.task_id, task_details.bot_id, params, buf.pop()):
            # Server is telling us to stop. Normally task cancellation.
            if not kill_sent:
              logging.warning('Server induced stop; sending SIGTERM')
            proc.terminate()
            timed_out = monotonic_time()

        # Send signal on timeout if necessary. Both are failures, not
        # internal_failures.
        # Eventually kill but return 0 so bot_main.py doesn't cancel the task.
        if not timed_out:
          if (task_details.io_timeout and
              buf.since_last_io > task_details.io_timeout):
            had_io_timeout = True
            logging.warning(
                'I/O timeout is %.3fs; no update for %.3fs sending SIGTERM',
                task_details.io_timeout, buf.since_last_io)
            proc.terminate()
            timed_out = monotonic_time()
        else:
          # During grace period.
          if (not kill_sent and
              buf.last_loop - timed_out >= task_details.grace_period):
            # Now kill for real. The user can distinguish between the following
            # states:
            # - signal but process exited within grace period,
            #   (hard_|io_)_timed_out will be set but the process exit code will
            #   be script provided.
            # - processed exited late, exit code will be -9 on posix.
            logging.warning(
                'Grace of %.3fs exhausted at %.3fs; sending SIGKILL',
                task_details.grace_period, buf.last_loop - timed_out)
            proc.kill()
            kill_sent = True
      logging.info('Waiting for process exit')
      exit_code = proc.wait()
    except (
        ExitSignal, InternalError, IOError,
        OSError, remote_client.InternalError) as e:
      # Something wrong happened, try to kill the child process.
      must_signal_internal_failure = str(e.message or 'unknown error')
      exit_code = kill_and_wait(proc, task_details.grace_period, e.message)

    # This is the very last packet for this command. It if was an isolated task,
    # include the output reference to the archived .isolated file.
    now = monotonic_time()
    params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
    params['duration'] = now - start
    params['io_timeout'] = had_io_timeout
    had_hard_timeout = False
    try:
      if not os.path.isfile(isolated_result):
        # It's possible if
        # - run_isolated.py did not start
        # - run_isolated.py started, but arguments were invalid
        # - host in a situation unable to fork
        # - grand child process outliving the child process deleting everything
        #   it can
        # Do not create an internal error, just send back the (partial)
        # view as task_runner saw it, for example the real exit_code is
        # unknown.
        logging.warning('there\'s no result file')
        if exit_code is None:
          exit_code = -1
      else:
        # See run_isolated.py for the format.
        with open(isolated_result, 'rb') as f:
          run_isolated_result = json.load(f)
        logging.debug('run_isolated:\n%s', run_isolated_result)
        # TODO(maruel): Grab statistics (cache hit rate, data downloaded,
        # mapping time, etc) from run_isolated and push them to the server.
        if run_isolated_result['outputs_ref']:
          params['outputs_ref'] = run_isolated_result['outputs_ref']
        had_hard_timeout = run_isolated_result['had_hard_timeout']
        if not had_io_timeout and not had_hard_timeout:
          if run_isolated_result['internal_failure']:
            must_signal_internal_failure = (
                run_isolated_result['internal_failure'])
            logging.error('%s', must_signal_internal_failure)
          elif exit_code:
            # TODO(maruel): Grab stdout from run_isolated.
            must_signal_internal_failure = (
                'run_isolated internal failure %d' % exit_code)
            logging.error('%s', must_signal_internal_failure)
        exit_code = run_isolated_result['exit_code']
        params['bot_overhead'] = 0.
        if run_isolated_result.get('duration') is not None:
          # Calculate the real task duration as measured by run_isolated and
          # calculate the remaining overhead.
          params['bot_overhead'] = params['duration']
          params['duration'] = run_isolated_result['duration']
          params['bot_overhead'] -= params['duration']
          params['bot_overhead'] -= run_isolated_result.get(
              'download', {}).get('duration', 0)
          params['bot_overhead'] -= run_isolated_result.get(
              'upload', {}).get('duration', 0)
          params['bot_overhead'] -= run_isolated_result.get(
              'cipd', {}).get('duration', 0)
          if params['bot_overhead'] < 0:
            params['bot_overhead'] = 0
        isolated_stats = run_isolated_result.get('stats', {}).get('isolated')
        if isolated_stats:
          params['isolated_stats'] = isolated_stats
        cipd_stats = run_isolated_result.get('stats', {}).get('cipd')
        if cipd_stats:
          params['cipd_stats'] = cipd_stats
        cipd_pins = run_isolated_result.get('cipd_pins')
        if cipd_pins:
          params['cipd_pins'] = cipd_pins
    except (IOError, OSError, ValueError) as e:
      logging.error('Swallowing error: %s', e)
      if not must_signal_internal_failure:
        must_signal_internal_failure = '%s\n%s' % (
            e, traceback.format_exc()[-2048:])

    # If no exit code has been set, something went wrong with run_isolated.py.
    # Set exit code to -1 to indicate a generic error occurred.
    if exit_code is None:
      exit_code = -1
    params['hard_timeout'] = had_hard_timeout

    # Ignore server reply to stop. Also ignore internal errors here if we are
    # already handling some.
    try:
      if must_signal_internal_failure:
        # We need to update the task and then send task error. However, we
        # should *not* send the exit_code since doing so would cause the task
        # to be marked as COMPLETED until the subsequent post_task_error call
        # finished, which would cause any query made between these two calls to
        # get the wrong task status. We also clear out the duration and various
        # stats as the server prints errors if either are set in this case.
        # TODO(sethkoehler): Come up with some way to still send the exit_code
        # (and thus also duration/stats) without marking the task COMPLETED.
        exit_code = None
        params.pop('duration', None)
        params.pop('bot_overhead', None)
        params.pop('isolated_stats', None)
        params.pop('cipd_stats', None)
        params.pop('cipd_pins', None)
      remote.post_task_update(
          task_details.task_id, task_details.bot_id, params, buf.pop(),
          exit_code)
      if must_signal_internal_failure:
        remote.post_task_error(task_details.task_id, task_details.bot_id,
            must_signal_internal_failure)
        # Clear out this error as we've posted it now (we already cleared out
        # exit_code above). Note: another error could arise after this point,
        # which is fine, since bot_main.py will post it).
        must_signal_internal_failure = ''
    except remote_client.InternalError as e:
      logging.error('Internal error while finishing the task: %s', e)
      if not must_signal_internal_failure:
        must_signal_internal_failure = str(e.message or 'unknown error')

    return {
      u'exit_code': exit_code,
      u'hard_timeout': had_hard_timeout,
      u'io_timeout': had_io_timeout,
      u'must_signal_internal_failure': must_signal_internal_failure,
      u'version': OUT_VERSION,
    }
  finally:
    file_path.try_remove(unicode(isolated_result))
Example #7
0
def run_command(remote, task_details, work_dir, cost_usd_hour, task_start,
                min_free_space, bot_file):
    """Runs a command and sends packets to the server to stream results back.

  Implements both I/O and hard timeouts. Sends the packets numbered, so the
  server can ensure they are processed in order.

  Returns:
    Metadata dict with the execution result.

  Raises:
    ExitSignal if caught some signal when starting or stopping.
    InternalError on unexpected internal errors.
  """
    # TODO(maruel): This function is incomprehensible, split and refactor.

    # Signal the command is about to be started.
    last_packet = start = now = monotonic_time()
    task_id = task_details.task_id
    bot_id = task_details.bot_id
    params = {
        'cost_usd': cost_usd_hour * (now - task_start) / 60. / 60.,
    }
    if not remote.post_task_update(task_id, bot_id, params):
        # Don't even bother, the task was already canceled.
        return {
            u'exit_code': -1,
            u'hard_timeout': False,
            u'io_timeout': False,
            u'must_signal_internal_failure': None,
            u'version': OUT_VERSION,
        }

    isolated_result = os.path.join(work_dir, 'isolated_result.json')
    args_path = os.path.join(work_dir, 'run_isolated_args.json')
    cmd = get_run_isolated()
    cmd.extend(['-a', args_path])
    args = get_isolated_args(work_dir, task_details, isolated_result, bot_file,
                             min_free_space)
    # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to
    # give one 'grace_period' slot to the child process and one slot to upload
    # the results back.
    task_details.hard_timeout = 0
    if task_details.grace_period:
        task_details.grace_period *= 2

    try:
        # TODO(maruel): Support both channels independently and display stderr in
        # red.
        env = os.environ.copy()
        for key, value in (task_details.env or {}).iteritems():
            if not value:
                env.pop(key, None)
            else:
                env[key] = value
        logging.info('cmd=%s', cmd)
        logging.info('cwd=%s', work_dir)
        logging.info('env=%s', env)
        fail_on_start = lambda exit_code, stdout: fail_without_command(
            remote, bot_id, task_id, params, cost_usd_hour, task_start,
            exit_code, stdout)

        # We write args to a file since there may be more of them than the OS
        # can handle.
        try:
            with open(args_path, 'w') as f:
                json.dump(args, f)
        except (IOError, OSError) as e:
            return fail_on_start(
                -1, 'Could not write args to %s: %s' % (args_path, e))

        # Start the command
        try:
            assert cmd and all(isinstance(a, basestring) for a in cmd)
            proc = subprocess42.Popen(cmd,
                                      env=env,
                                      cwd=work_dir,
                                      detached=True,
                                      stdout=subprocess42.PIPE,
                                      stderr=subprocess42.STDOUT,
                                      stdin=subprocess42.PIPE)
        except OSError as e:
            return fail_on_start(
                1, 'Command "%s" failed to start.\nError: %s' %
                (' '.join(cmd), e))

        # Monitor the task
        output_chunk_start = 0
        stdout = ''
        exit_code = None
        had_io_timeout = False
        must_signal_internal_failure = None
        kill_sent = False
        timed_out = None
        try:
            calc = lambda: calc_yield_wait(task_details, start, last_io,
                                           timed_out, stdout)
            maxsize = lambda: MAX_CHUNK_SIZE - len(stdout)
            last_io = monotonic_time()
            for _, new_data in proc.yield_any(maxsize=maxsize, timeout=calc):
                now = monotonic_time()
                if new_data:
                    stdout += new_data
                    last_io = now

                # Post update if necessary.
                if should_post_update(stdout, now, last_packet):
                    last_packet = monotonic_time()
                    params['cost_usd'] = (cost_usd_hour *
                                          (last_packet - task_start) / 60. /
                                          60.)
                    if not remote.post_task_update(
                            task_id, bot_id, params,
                        (stdout, output_chunk_start)):
                        # Server is telling us to stop. Normally task cancellation.
                        if not kill_sent:
                            logging.warning(
                                'Server induced stop; sending SIGKILL')
                            proc.kill()
                            kill_sent = True

                    output_chunk_start += len(stdout)
                    stdout = ''

                # Send signal on timeout if necessary. Both are failures, not
                # internal_failures.
                # Eventually kill but return 0 so bot_main.py doesn't cancel the task.
                if not timed_out:
                    if (task_details.io_timeout
                            and now - last_io > task_details.io_timeout):
                        had_io_timeout = True
                        logging.warning(
                            'I/O timeout is %.3fs; no update for %.3fs sending SIGTERM',
                            task_details.io_timeout, now - last_io)
                        proc.terminate()
                        timed_out = monotonic_time()
                else:
                    # During grace period.
                    if not kill_sent and now - timed_out >= task_details.grace_period:
                        # Now kill for real. The user can distinguish between the following
                        # states:
                        # - signal but process exited within grace period,
                        #   (hard_|io_)_timed_out will be set but the process exit code will
                        #   be script provided.
                        # - processed exited late, exit code will be -9 on posix.
                        logging.warning(
                            'Grace of %.3fs exhausted at %.3fs; sending SIGKILL',
                            task_details.grace_period, now - timed_out)
                        proc.kill()
                        kill_sent = True
            logging.info('Waiting for process exit')
            exit_code = proc.wait()
        except (ExitSignal, InternalError, IOError, OSError) as e:
            # Something wrong happened, try to kill the child process.
            must_signal_internal_failure = str(e.message or 'unknown error')
            exit_code = kill_and_wait(proc, task_details.grace_period,
                                      e.message)

        # This is the very last packet for this command. It if was an isolated task,
        # include the output reference to the archived .isolated file.
        now = monotonic_time()
        params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
        params['duration'] = now - start
        params['io_timeout'] = had_io_timeout
        had_hard_timeout = False
        try:
            if not os.path.isfile(isolated_result):
                # It's possible if
                # - run_isolated.py did not start
                # - run_isolated.py started, but arguments were invalid
                # - host in a situation unable to fork
                # - grand child process outliving the child process deleting everything
                #   it can
                # Do not create an internal error, just send back the (partial)
                # view as task_runner saw it, for example the real exit_code is
                # unknown.
                logging.warning('there\'s no result file')
                if exit_code is None:
                    exit_code = -1
            else:
                # See run_isolated.py for the format.
                with open(isolated_result, 'rb') as f:
                    run_isolated_result = json.load(f)
                logging.debug('run_isolated:\n%s', run_isolated_result)
                # TODO(maruel): Grab statistics (cache hit rate, data downloaded,
                # mapping time, etc) from run_isolated and push them to the server.
                if run_isolated_result['outputs_ref']:
                    params['outputs_ref'] = run_isolated_result['outputs_ref']
                had_hard_timeout = run_isolated_result['had_hard_timeout']
                if not had_io_timeout and not had_hard_timeout:
                    if run_isolated_result['internal_failure']:
                        must_signal_internal_failure = (
                            run_isolated_result['internal_failure'])
                        logging.error('%s', must_signal_internal_failure)
                    elif exit_code:
                        # TODO(maruel): Grab stdout from run_isolated.
                        must_signal_internal_failure = (
                            'run_isolated internal failure %d' % exit_code)
                        logging.error('%s', must_signal_internal_failure)
                exit_code = run_isolated_result['exit_code']
                params['bot_overhead'] = 0.
                if run_isolated_result.get('duration') is not None:
                    # Calculate the real task duration as measured by run_isolated and
                    # calculate the remaining overhead.
                    params['bot_overhead'] = params['duration']
                    params['duration'] = run_isolated_result['duration']
                    params['bot_overhead'] -= params['duration']
                    params['bot_overhead'] -= run_isolated_result.get(
                        'download', {}).get('duration', 0)
                    params['bot_overhead'] -= run_isolated_result.get(
                        'upload', {}).get('duration', 0)
                    params['bot_overhead'] -= run_isolated_result.get(
                        'cipd', {}).get('duration', 0)
                    if params['bot_overhead'] < 0:
                        params['bot_overhead'] = 0
                isolated_stats = run_isolated_result.get('stats',
                                                         {}).get('isolated')
                if isolated_stats:
                    params['isolated_stats'] = isolated_stats
                cipd_stats = run_isolated_result.get('stats', {}).get('cipd')
                if cipd_stats:
                    params['cipd_stats'] = cipd_stats
                cipd_pins = run_isolated_result.get('cipd_pins')
                if cipd_pins:
                    params['cipd_pins'] = cipd_pins
        except (IOError, OSError, ValueError) as e:
            logging.error('Swallowing error: %s', e)
            if not must_signal_internal_failure:
                must_signal_internal_failure = '%s\n%s' % (
                    e, traceback.format_exc()[-2048:])

        # TODO(maruel): Send the internal failure here instead of sending it through
        # bot_main, this causes a race condition.
        if exit_code is None:
            exit_code = -1
        params['hard_timeout'] = had_hard_timeout

        # Ignore server reply to stop. Also ignore internal errors here if we are
        # already handling some.
        try:
            remote.post_task_update(task_id, bot_id, params,
                                    (stdout, output_chunk_start), exit_code)
        except InternalError as e:
            logging.error('Internal error while finishing the task: %s', e)
            if not must_signal_internal_failure:
                must_signal_internal_failure = str(e.message
                                                   or 'unknown error')

        return {
            u'exit_code': exit_code,
            u'hard_timeout': had_hard_timeout,
            u'io_timeout': had_io_timeout,
            u'must_signal_internal_failure': must_signal_internal_failure,
            u'version': OUT_VERSION,
        }
    finally:
        file_path.try_remove(unicode(isolated_result))
    def cleanup(self):
        """Cleans up the cache directory.

    Ensures there is no unknown files in cache_dir.
    Ensures the read-only bits are set correctly.

    At that point, the cache was already loaded, trimmed to respect cache
    policies.
    """
        with self._lock:
            fs.chmod(self.cache_dir, 0o700)
            # Ensure that all files listed in the state still exist and add new ones.
            previous = set(self._lru)
            # It'd be faster if there were a readdir() function.
            for filename in fs.listdir(self.cache_dir):
                if filename == self.STATE_FILE:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
                    continue
                if filename in previous:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
                    previous.remove(filename)
                    continue

                # An untracked file. Delete it.
                logging.warning('Removing unknown file %s from cache',
                                filename)
                p = self._path(filename)
                if fs.isdir(p):
                    try:
                        file_path.rmtree(p)
                    except OSError:
                        pass
                else:
                    file_path.try_remove(p)
                continue

            if previous:
                # Filter out entries that were not found.
                logging.warning('Removed %d lost files', len(previous))
                for filename in previous:
                    self._lru.pop(filename)
                self._save()

        # Verify hash of every single item to detect corruption. the corrupted
        # files will be evicted.
        with self._lock:
            for digest, (_, timestamp) in list(self._lru._items.items()):
                # verify only if the mtime is grather than the timestamp in state.json
                # to avoid take too long time.
                if self._get_mtime(digest) <= timestamp:
                    continue
                logging.warning('Item has been modified. item: %s', digest)
                if self._is_valid_hash(digest):
                    # Update timestamp in state.json
                    self._lru.touch(digest)
                    continue
                # remove corrupted file from LRU and file system
                self._lru.pop(digest)
                self._delete_file(digest, UNKNOWN_FILE_SIZE)
                logging.error('Deleted corrupted item: %s', digest)
            self._save()