def testPluralityCheckableIteratorReadsAheadAsNeeded(self):
    """Tests that the PCI does not unnecessarily read new elements."""

    class IterTest(object):

      def __init__(self):
        self.position = 0

      def __iter__(self):
        return self

      def next(self):
        if self.position == 3:
          raise StopIteration()
        self.position += 1

    # IsEmpty and PeekException should retrieve only 1 element from the
    # underlying iterator.
    pcit = PluralityCheckableIterator(IterTest())
    pcit.IsEmpty()
    pcit.PeekException()
    self.assertEquals(pcit.orig_iterator.position, 1)
    # HasPlurality requires populating 2 elements into the iterator.
    pcit.HasPlurality()
    self.assertEquals(pcit.orig_iterator.position, 2)
    # next should yield already-populated elements without advancing the
    # iterator.
    pcit.next()  # Yields element 1
    self.assertEquals(pcit.orig_iterator.position, 2)
    pcit.next()  # Yields element 2
    self.assertEquals(pcit.orig_iterator.position, 2)
    pcit.next()  # Yields element 3
    self.assertEquals(pcit.orig_iterator.position, 3)
    try:
      pcit.next()  # Underlying iterator is empty
      self.fail('Expected StopIteration')
    except StopIteration:
      pass
Beispiel #2
0
    def testPluralityCheckableIteratorReadsAheadAsNeeded(self):
        """Tests that the PCI does not unnecessarily read new elements."""
        class IterTest(object):
            def __init__(self):
                self.position = 0

            def __iter__(self):
                return self

            def next(self):
                if self.position == 3:
                    raise StopIteration()
                self.position += 1

        # IsEmpty and PeekException should retrieve only 1 element from the
        # underlying iterator.
        pcit = PluralityCheckableIterator(IterTest())
        pcit.IsEmpty()
        pcit.PeekException()
        self.assertEquals(pcit.orig_iterator.position, 1)
        # HasPlurality requires populating 2 elements into the iterator.
        pcit.HasPlurality()
        self.assertEquals(pcit.orig_iterator.position, 2)
        # next should yield already-populated elements without advancing the
        # iterator.
        pcit.next()  # Yields element 1
        self.assertEquals(pcit.orig_iterator.position, 2)
        pcit.next()  # Yields element 2
        self.assertEquals(pcit.orig_iterator.position, 2)
        pcit.next()  # Yields element 3
        self.assertEquals(pcit.orig_iterator.position, 3)
        try:
            pcit.next()  # Underlying iterator is empty
            self.fail('Expected StopIteration')
        except StopIteration:
            pass
Beispiel #3
0
class _DiffIterator(object):
    """Iterator yielding sequence of _DiffToApply objects."""

    def __init__(self, command_obj, base_src_url, base_dst_url):
        self.command_obj = command_obj
        self.compute_file_checksums = command_obj.compute_file_checksums
        self.delete_extras = command_obj.delete_extras
        self.recursion_requested = command_obj.recursion_requested
        self.logger = self.command_obj.logger
        self.base_src_url = base_src_url
        self.base_dst_url = base_dst_url
        self.logger.info("Building synchronization state...")

        (src_fh, self.sorted_list_src_file_name) = tempfile.mkstemp(prefix="gsutil-rsync-src-")
        _tmp_files.append(self.sorted_list_src_file_name)
        (dst_fh, self.sorted_list_dst_file_name) = tempfile.mkstemp(prefix="gsutil-rsync-dst-")
        _tmp_files.append(self.sorted_list_dst_file_name)
        # Close the file handles; the file will be opened in write mode by
        # _ListUrlRootFunc.
        os.close(src_fh)
        os.close(dst_fh)

        # Build sorted lists of src and dst URLs in parallel. To do this, pass args
        # to _ListUrlRootFunc as tuple (base_url_str, out_filename, desc)
        # where base_url_str is the starting URL string for listing.
        args_iter = iter(
            [
                (self.base_src_url.url_string, self.sorted_list_src_file_name, "source"),
                (self.base_dst_url.url_string, self.sorted_list_dst_file_name, "destination"),
            ]
        )

        # Contains error message from non-retryable listing failure.
        command_obj.non_retryable_listing_failures = 0
        shared_attrs = ["non_retryable_listing_failures"]
        command_obj.Apply(
            _ListUrlRootFunc,
            args_iter,
            _RootListingExceptionHandler,
            shared_attrs,
            arg_checker=DummyArgChecker,
            parallel_operations_override=True,
            fail_on_error=True,
        )

        if command_obj.non_retryable_listing_failures:
            raise CommandException("Caught non-retryable exception - aborting rsync")

        self.sorted_list_src_file = open(self.sorted_list_src_file_name, "r")
        self.sorted_list_dst_file = open(self.sorted_list_dst_file_name, "r")

        # Wrap iterators in PluralityCheckableIterator so we can check emptiness.
        self.sorted_src_urls_it = PluralityCheckableIterator(iter(self.sorted_list_src_file))
        self.sorted_dst_urls_it = PluralityCheckableIterator(iter(self.sorted_list_dst_file))

    def _ParseTmpFileLine(self, line):
        """Parses output from _BuildTmpOutputLine.

    Parses into tuple:
      (URL, size, crc32c, md5)
    where crc32c and/or md5 can be _NA.

    Args:
      line: The line to parse.

    Returns:
      Parsed tuple: (url, size, crc32c, md5)
    """
        (encoded_url, size, crc32c, md5) = line.split()
        return (_DecodeUrl(encoded_url), int(size), crc32c, md5.strip())

    def _WarnIfMissingCloudHash(self, url_str, crc32c, md5):
        """Warns if given url_str is a cloud URL and is missing both crc32c and md5.

    Args:
      url_str: Destination URL string.
      crc32c: Destination CRC32c.
      md5: Destination MD5.

    Returns:
      True if issued warning.
    """
        # One known way this can currently happen is when rsync'ing objects larger
        # than 5 GB from S3 (for which the etag is not an MD5).
        if StorageUrlFromString(url_str).IsCloudUrl() and crc32c == _NA and md5 == _NA:
            self.logger.warn("Found no hashes to validate %s. Integrity cannot be assured without " "hashes.", url_str)
            return True
        return False

    def _ObjectsMatch(self, src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5):
        """Returns True if src and dst objects are the same.

    Uses size plus whatever checksums are available.

    Args:
      src_url_str: Source URL string.
      src_size: Source size
      src_crc32c: Source CRC32c.
      src_md5: Source MD5.
      dst_url_str: Destination URL string.
      dst_size: Destination size
      dst_crc32c: Destination CRC32c.
      dst_md5: Destination MD5.

    Returns:
      True/False.
    """
        # Note: This function is called from __iter__, which is called from the
        # Command.Apply driver. Thus, all checksum computation will be run in a
        # single thread, which is good (having multiple threads concurrently
        # computing checksums would thrash the disk).
        if src_size != dst_size:
            return False
        if self.compute_file_checksums:
            (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(
                self.logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5
            )
        if src_md5 != _NA and dst_md5 != _NA:
            self.logger.debug("Comparing md5 for %s and %s", src_url_str, dst_url_str)
            return src_md5 == dst_md5
        if src_crc32c != _NA and dst_crc32c != _NA:
            self.logger.debug("Comparing crc32c for %s and %s", src_url_str, dst_url_str)
            return src_crc32c == dst_crc32c
        if not self._WarnIfMissingCloudHash(src_url_str, src_crc32c, src_md5):
            self._WarnIfMissingCloudHash(dst_url_str, dst_crc32c, dst_md5)
        # Without checksums to compare we depend only on basic size comparison.
        return True

    def __iter__(self):
        """Iterates over src/dst URLs and produces a _DiffToApply sequence.

    Yields:
      The _DiffToApply.
    """
        # Strip trailing slashes, if any, so we compute tail length against
        # consistent position regardless of whether trailing slashes were included
        # or not in URL.
        base_src_url_len = len(self.base_src_url.url_string.rstrip("/\\"))
        base_dst_url_len = len(self.base_dst_url.url_string.rstrip("/\\"))
        src_url_str = dst_url_str = None
        # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
        # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
        # processed. Each time we encounter None in src_url_str or dst_url_str we
        # populate from the respective iterator, and we reset one or the other value
        # to None after yielding an action that disposes of that URL.
        while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
            if src_url_str is None:
                (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine(self.sorted_src_urls_it.next())
                # Skip past base URL and normalize slashes so we can compare across
                # clouds/file systems (including Windows).
                src_url_str_to_check = _EncodeUrl(src_url_str[base_src_url_len:].replace("\\", "/"))
                dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
                    self.base_src_url,
                    StorageUrlFromString(src_url_str),
                    True,
                    True,
                    self.base_dst_url,
                    False,
                    self.recursion_requested,
                ).url_string
            if self.sorted_dst_urls_it.IsEmpty():
                # We've reached end of dst URLs, so copy src to dst.
                yield _DiffToApply(src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
                src_url_str = None
                continue
            if not dst_url_str:
                (dst_url_str, dst_size, dst_crc32c, dst_md5) = self._ParseTmpFileLine(self.sorted_dst_urls_it.next())
                # Skip past base URL and normalize slashes so we can compare acros
                # clouds/file systems (including Windows).
                dst_url_str_to_check = _EncodeUrl(dst_url_str[base_dst_url_len:].replace("\\", "/"))

            if src_url_str_to_check < dst_url_str_to_check:
                # There's no dst object corresponding to src object, so copy src to dst.
                yield _DiffToApply(src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
                src_url_str = None
            elif src_url_str_to_check > dst_url_str_to_check:
                # dst object without a corresponding src object, so remove dst if -d
                # option was specified.
                if self.delete_extras:
                    yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
                dst_url_str = None
            else:
                # There is a dst object corresponding to src object, so check if objects
                # match.
                if self._ObjectsMatch(
                    src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5
                ):
                    # Continue iterating without yielding a _DiffToApply.
                    pass
                else:
                    yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY)
                src_url_str = None
                dst_url_str = None

        # If -d option specified any files/objects left in dst iteration should be
        # removed.
        if not self.delete_extras:
            return
        if dst_url_str:
            yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
            dst_url_str = None
        for line in self.sorted_dst_urls_it:
            (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
            yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
Beispiel #4
0
class _DiffIterator(object):
  """Iterator yielding sequence of _DiffToApply objects."""

  def __init__(self, command_obj, base_src_url, base_dst_url):
    self.command_obj = command_obj
    self.compute_checksums = command_obj.compute_checksums
    self.delete_extras = command_obj.delete_extras
    self.recursion_requested = command_obj.recursion_requested
    self.logger = self.command_obj.logger
    self.base_src_url = base_src_url
    self.base_dst_url = base_dst_url
    self.logger.info('Building synchronization state...')

    (src_fh, self.sorted_list_src_file_name) = tempfile.mkstemp(
        prefix='gsutil-rsync-src-')
    (dst_fh, self.sorted_list_dst_file_name) = tempfile.mkstemp(
        prefix='gsutil-rsync-dst-')
    # Close the file handles; the file will be opened in write mode by
    # _ListUrlRootFunc.
    os.close(src_fh)
    os.close(dst_fh)

    # Build sorted lists of src and dst URLs in parallel. To do this, pass args
    # to _ListUrlRootFunc as tuple (url_str, out_file_name, desc).
    args_iter = iter([
        (self.base_src_url.GetUrlString(), self.sorted_list_src_file_name,
         'source'),
        (self.base_dst_url.GetUrlString(), self.sorted_list_dst_file_name,
         'destination')
    ])
    command_obj.Apply(_ListUrlRootFunc, args_iter, _RootListingExceptionHandler,
                      arg_checker=DummyArgChecker,
                      parallel_operations_override=True,
                      fail_on_error=True)

    self.sorted_list_src_file = open(self.sorted_list_src_file_name, 'rb')
    self.sorted_list_dst_file = open(self.sorted_list_dst_file_name, 'rb')

    # Wrap iterators in PluralityCheckableIterator so we can check emptiness.
    self.sorted_src_urls_it = PluralityCheckableIterator(
        iter(self.sorted_list_src_file))
    self.sorted_dst_urls_it = PluralityCheckableIterator(
        iter(self.sorted_list_dst_file))

  # pylint: disable=bare-except
  def CleanUpTempFiles(self):
    """Cleans up temp files.

    This function allows the main (RunCommand) function to clean up at end of
    operation. This is necessary because tempfile.NamedTemporaryFile doesn't
    allow the created file to be re-opened in read mode on Windows, so we have
    to use tempfile.mkstemp, which doesn't automatically delete temp files (see
    https://mail.python.org/pipermail/python-list/2005-December/336958.html).
    """
    try:
      self.sorted_list_src_file.close()
      self.sorted_list_dst_file.close()
      for fname in (self.sorted_list_src_file_name,
                    self.sorted_list_dst_file_name):
        os.unlink(fname)
    except:
      pass

  def _ParseTmpFileLine(self, line):
    """Parses output from _BuildTmpOutputLine.

    Parses into tuple:
      (URL, size, crc32c, md5)
    where crc32c and/or md5 can be _NA.

    Args:
      line: The line to parse.

    Returns:
      Parsed tuple: (url, size, crc32c, md5)
    """
    (encoded_url, size, crc32c, md5) = line.split()
    return (urllib.unquote_plus(encoded_url).decode(UTF8),
            int(size), crc32c, md5.strip())

  def _WarnIfMissingCloudHash(self, url_str, crc32c, md5):
    """Warns if given url_str is a cloud URL and is missing both crc32c and md5.

    Args:
      url_str: Destination URL string.
      crc32c: Destination CRC32c.
      md5: Destination MD5.

    Returns:
      True if issued warning.
    """
    # One known way this can currently happen is when rsync'ing objects larger
    # than 5GB from S3 (for which the etag is not an MD5).
    if (StorageUrlFromString(url_str).IsCloudUrl()
        and crc32c == _NA and md5 == _NA):
      self.logger.warn(
          'Found no hashes to validate %s. '
          'Integrity cannot be assured without hashes.' % url_str)
      return True
    return False

  def _ObjectsMatch(self, src_url_str, src_size, src_crc32c, src_md5,
                    dst_url_str, dst_size, dst_crc32c, dst_md5):
    """Returns True if src and dst objects are the same.

    Uses size plus whatever checksums are available.

    Args:
      src_url_str: Source URL string.
      src_size: Source size
      src_crc32c: Source CRC32c.
      src_md5: Source MD5.
      dst_url_str: Destination URL string.
      dst_size: Destination size
      dst_crc32c: Destination CRC32c.
      dst_md5: Destination MD5.

    Returns:
      True/False.
    """
    # Note: This function is called from __iter__, which is called from the
    # Command.Apply driver. Thus, all checksum computation will be run in a
    # single thread, which is good (having multiple threads concurrently
    # computing checksums would thrash the disk).
    if src_size != dst_size:
      return False
    if self.compute_checksums:
      (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(
          self.logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str,
          dst_size, dst_crc32c, dst_md5)
    if src_md5 != _NA and dst_md5 != _NA:
      self.logger.debug('Comparing md5 for %s and %s', src_url_str, dst_url_str)
      return src_md5 == dst_md5
    if src_crc32c != _NA and dst_crc32c != _NA:
      self.logger.debug(
          'Comparing crc32c for %s and %s', src_url_str, dst_url_str)
      return src_crc32c == dst_crc32c
    if not self._WarnIfMissingCloudHash(src_url_str, src_crc32c, src_md5):
      self._WarnIfMissingCloudHash(dst_url_str, dst_crc32c, dst_md5)
    # Without checksums to compare we depend only on basic size comparison.
    return True

  def __iter__(self):
    """Iterates over src/dst URLs and produces a _DiffToApply sequence.

    Yields:
      The _DiffToApply.
    """
    # Strip trailing slashes, if any, so we compute tail length against
    # consistent position regardless of whether trailing slashes were included
    # or not in URL.
    base_src_url_len = len(self.base_src_url.GetUrlString().rstrip('/\\'))
    base_dst_url_len = len(self.base_dst_url.GetUrlString().rstrip('/\\'))
    src_url_str = dst_url_str = None
    # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
    # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
    # processed. Each time we encounter None in src_url_str or dst_url_str we
    # populate from the respective iterator, and we reset one or the other value
    # to None after yielding an action that disposes of that URL.
    while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
      if src_url_str is None:
        (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine(
            self.sorted_src_urls_it.next())
        # Skip past base URL and normalize slashes so we can compare across
        # clouds/file systems (including Windows).
        src_url_str_to_check = src_url_str[base_src_url_len:].replace('\\', '/')
        dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
            self.base_src_url, StorageUrlFromString(src_url_str), True, True,
            True, self.base_dst_url, False,
            self.recursion_requested).GetUrlString()
      if self.sorted_dst_urls_it.IsEmpty():
        # We've reached end of dst URLs, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
        continue
      if not dst_url_str:
        (dst_url_str, dst_size, dst_crc32c, dst_md5) = (
            self._ParseTmpFileLine(self.sorted_dst_urls_it.next()))
        # Skip past base URL and normalize slashes so we can compare acros
        # clouds/file systems (including Windows).
        dst_url_str_to_check = dst_url_str[base_dst_url_len:].replace('\\', '/')

      if src_url_str_to_check < dst_url_str_to_check:
        # There's no dst object corresponding to src object, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
      elif src_url_str_to_check > dst_url_str_to_check:
        # dst object without a corresponding src object, so remove dst if -d
        # option was specified.
        if self.delete_extras:
          yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
        dst_url_str = None
      else:
        # There is a dst object corresponding to src object, so check if objects
        # match.
        if self._ObjectsMatch(
            src_url_str, src_size, src_crc32c, src_md5,
            dst_url_str, dst_size, dst_crc32c, dst_md5):
          # Continue iterating without yielding a _DiffToApply.
          src_url_str = None
          dst_url_str = None
        else:
          yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY)
          dst_url_str = None

    # If -d option specified any files/objects left in dst iteration should be
    # removed.
    if not self.delete_extras:
      return
    if dst_url_str:
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
      dst_url_str = None
    for line in self.sorted_dst_urls_it:
      (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
Beispiel #5
0
class _DiffIterator(object):
  """Iterator yielding sequence of _DiffToApply objects."""

  def __init__(self, command_obj, base_src_url, base_dst_url):
    self.command_obj = command_obj
    self.compute_file_checksums = command_obj.compute_file_checksums
    self.delete_extras = command_obj.delete_extras
    self.recursion_requested = command_obj.recursion_requested
    self.logger = self.command_obj.logger
    self.base_src_url = base_src_url
    self.base_dst_url = base_dst_url
    self.logger.info('Building synchronization state...')

    (src_fh, self.sorted_list_src_file_name) = tempfile.mkstemp(
        prefix='gsutil-rsync-src-')
    _tmp_files.append(self.sorted_list_src_file_name)
    (dst_fh, self.sorted_list_dst_file_name) = tempfile.mkstemp(
        prefix='gsutil-rsync-dst-')
    _tmp_files.append(self.sorted_list_dst_file_name)
    # Close the file handles; the file will be opened in write mode by
    # _ListUrlRootFunc.
    os.close(src_fh)
    os.close(dst_fh)

    # Build sorted lists of src and dst URLs in parallel. To do this, pass args
    # to _ListUrlRootFunc as tuple (base_url_str, out_filename, desc)
    # where base_url_str is the starting URL string for listing.
    args_iter = iter([
        (self.base_src_url.url_string, self.sorted_list_src_file_name,
         'source'),
        (self.base_dst_url.url_string, self.sorted_list_dst_file_name,
         'destination')
    ])

    # Contains error message from non-retryable listing failure.
    command_obj.non_retryable_listing_failures = 0
    shared_attrs = ['non_retryable_listing_failures']
    command_obj.Apply(_ListUrlRootFunc, args_iter, _RootListingExceptionHandler,
                      shared_attrs, arg_checker=DummyArgChecker,
                      parallel_operations_override=True,
                      fail_on_error=True)

    if command_obj.non_retryable_listing_failures:
      raise CommandException('Caught non-retryable exception - aborting rsync')

    self.sorted_list_src_file = open(self.sorted_list_src_file_name, 'r')
    self.sorted_list_dst_file = open(self.sorted_list_dst_file_name, 'r')

    # Wrap iterators in PluralityCheckableIterator so we can check emptiness.
    self.sorted_src_urls_it = PluralityCheckableIterator(
        iter(self.sorted_list_src_file))
    self.sorted_dst_urls_it = PluralityCheckableIterator(
        iter(self.sorted_list_dst_file))

  def _ParseTmpFileLine(self, line):
    """Parses output from _BuildTmpOutputLine.

    Parses into tuple:
      (URL, size, crc32c, md5)
    where crc32c and/or md5 can be _NA.

    Args:
      line: The line to parse.

    Returns:
      Parsed tuple: (url, size, crc32c, md5)
    """
    (encoded_url, size, crc32c, md5) = line.split()
    return (_DecodeUrl(encoded_url), int(size), crc32c, md5.strip())

  def _WarnIfMissingCloudHash(self, url_str, crc32c, md5):
    """Warns if given url_str is a cloud URL and is missing both crc32c and md5.

    Args:
      url_str: Destination URL string.
      crc32c: Destination CRC32c.
      md5: Destination MD5.

    Returns:
      True if issued warning.
    """
    # One known way this can currently happen is when rsync'ing objects larger
    # than 5 GB from S3 (for which the etag is not an MD5).
    if (StorageUrlFromString(url_str).IsCloudUrl()
        and crc32c == _NA and md5 == _NA):
      self.logger.warn(
          'Found no hashes to validate %s. Integrity cannot be assured without '
          'hashes.', url_str)
      return True
    return False

  def _ObjectsMatch(self, src_url_str, src_size, src_crc32c, src_md5,
                    dst_url_str, dst_size, dst_crc32c, dst_md5):
    """Returns True if src and dst objects are the same.

    Uses size plus whatever checksums are available.

    Args:
      src_url_str: Source URL string.
      src_size: Source size
      src_crc32c: Source CRC32c.
      src_md5: Source MD5.
      dst_url_str: Destination URL string.
      dst_size: Destination size
      dst_crc32c: Destination CRC32c.
      dst_md5: Destination MD5.

    Returns:
      True/False.
    """
    # Note: This function is called from __iter__, which is called from the
    # Command.Apply driver. Thus, all checksum computation will be run in a
    # single thread, which is good (having multiple threads concurrently
    # computing checksums would thrash the disk).
    if src_size != dst_size:
      return False
    if self.compute_file_checksums:
      (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(
          self.logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str,
          dst_size, dst_crc32c, dst_md5)
    if src_md5 != _NA and dst_md5 != _NA:
      self.logger.debug('Comparing md5 for %s and %s', src_url_str, dst_url_str)
      return src_md5 == dst_md5
    if src_crc32c != _NA and dst_crc32c != _NA:
      self.logger.debug(
          'Comparing crc32c for %s and %s', src_url_str, dst_url_str)
      return src_crc32c == dst_crc32c
    if not self._WarnIfMissingCloudHash(src_url_str, src_crc32c, src_md5):
      self._WarnIfMissingCloudHash(dst_url_str, dst_crc32c, dst_md5)
    # Without checksums to compare we depend only on basic size comparison.
    return True

  def __iter__(self):
    """Iterates over src/dst URLs and produces a _DiffToApply sequence.

    Yields:
      The _DiffToApply.
    """
    # Strip trailing slashes, if any, so we compute tail length against
    # consistent position regardless of whether trailing slashes were included
    # or not in URL.
    base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\'))
    base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\'))
    src_url_str = dst_url_str = None
    # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
    # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
    # processed. Each time we encounter None in src_url_str or dst_url_str we
    # populate from the respective iterator, and we reset one or the other value
    # to None after yielding an action that disposes of that URL.
    while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
      if src_url_str is None:
        (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine(
            self.sorted_src_urls_it.next())
        # Skip past base URL and normalize slashes so we can compare across
        # clouds/file systems (including Windows).
        src_url_str_to_check = _EncodeUrl(
            src_url_str[base_src_url_len:].replace('\\', '/'))
        dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
            self.base_src_url, StorageUrlFromString(src_url_str), True, True,
            self.base_dst_url, False, self.recursion_requested).url_string
      if self.sorted_dst_urls_it.IsEmpty():
        # We've reached end of dst URLs, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
        continue
      if not dst_url_str:
        (dst_url_str, dst_size, dst_crc32c, dst_md5) = (
            self._ParseTmpFileLine(self.sorted_dst_urls_it.next()))
        # Skip past base URL and normalize slashes so we can compare acros
        # clouds/file systems (including Windows).
        dst_url_str_to_check = _EncodeUrl(
            dst_url_str[base_dst_url_len:].replace('\\', '/'))

      if src_url_str_to_check < dst_url_str_to_check:
        # There's no dst object corresponding to src object, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
      elif src_url_str_to_check > dst_url_str_to_check:
        # dst object without a corresponding src object, so remove dst if -d
        # option was specified.
        if self.delete_extras:
          yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
        dst_url_str = None
      else:
        # There is a dst object corresponding to src object, so check if objects
        # match.
        if self._ObjectsMatch(
            src_url_str, src_size, src_crc32c, src_md5,
            dst_url_str, dst_size, dst_crc32c, dst_md5):
          # Continue iterating without yielding a _DiffToApply.
          pass
        else:
          yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY)
        src_url_str = None
        dst_url_str = None

    # If -d option specified any files/objects left in dst iteration should be
    # removed.
    if not self.delete_extras:
      return
    if dst_url_str:
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
      dst_url_str = None
    for line in self.sorted_dst_urls_it:
      (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)