Beispiel #1
0
def _RsyncFunc(cls, diff_to_apply, thread_state=None):
  """Worker function for performing the actual copy and remove operations."""
  gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
  dst_url_str = diff_to_apply.dst_url_str
  dst_url = StorageUrlFromString(dst_url_str)
  if diff_to_apply.diff_action == _DiffAction.REMOVE:
    if cls.dryrun:
      cls.logger.info('Would remove %s', dst_url)
    else:
      cls.logger.info('Removing %s', dst_url)
      if dst_url.IsFileUrl():
        os.unlink(dst_url.object_name)
      else:
        try:
          gsutil_api.DeleteObject(
              dst_url.bucket_name, dst_url.object_name,
              generation=dst_url.generation, provider=dst_url.scheme)
        except NotFoundException:
          # If the object happened to be deleted by an external process, this
          # is fine because it moves us closer to the desired state.
          pass
  elif diff_to_apply.diff_action == _DiffAction.COPY:
    src_url_str = diff_to_apply.src_url_str
    src_url = StorageUrlFromString(src_url_str)
    if cls.dryrun:
      cls.logger.info('Would copy %s to %s', src_url, dst_url)
    else:
      copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api, cls,
                              _RsyncExceptionHandler,
                              headers=cls.headers)
  else:
    raise CommandException('Got unexpected DiffAction (%d)'
                           % diff_to_apply.diff_action)
def CreateWildcardIterator(url_str,
                           gsutil_api,
                           all_versions=False,
                           debug=0,
                           project_id=None,
                           ignore_symlinks=False):
    """Instantiate a WildcardIterator for the given URL string.

  Args:
    url_str: URL string naming wildcard object(s) to iterate.
    gsutil_api: Cloud storage interface.  Passed in for thread safety, also
                settable for testing/mocking.
    all_versions: If true, the iterator yields all versions of objects
                  matching the wildcard.  If false, yields just the live
                  object version.
    debug: Debug level to control debug output for iterator.
    project_id: Project id to use for bucket listings.
    ignore_symlinks: For FileUrls, ignore symlinks during iteration if true.

  Returns:
    A WildcardIterator that handles the requested iteration.
  """

    url = StorageUrlFromString(url_str)
    if url.IsFileUrl():
        return FileWildcardIterator(url,
                                    debug=debug,
                                    ignore_symlinks=ignore_symlinks)
    else:  # Cloud URL
        return CloudWildcardIterator(url,
                                     gsutil_api,
                                     all_versions=all_versions,
                                     debug=debug,
                                     project_id=project_id)
Beispiel #3
0
def CreateWildcardIterator(url_str,
                           gsutil_api,
                           all_versions=False,
                           project_id=None,
                           ignore_symlinks=False,
                           logger=None):
    """Instantiate a WildcardIterator for the given URL string.

  Args:
    url_str: URL string naming wildcard object(s) to iterate.
    gsutil_api: Cloud storage interface.  Passed in for thread safety, also
                settable for testing/mocking.
    all_versions: If true, the iterator yields all versions of objects
                  matching the wildcard.  If false, yields just the live
                  object version.
    project_id: Project id to use for bucket listings.
    ignore_symlinks: For FileUrls, ignore symlinks during iteration if true.
    logger: logging.Logger used for outputting debug messages during iteration.
            If None, the root logger will be used.

  Returns:
    A WildcardIterator that handles the requested iteration.
  """

    url = StorageUrlFromString(url_str)
    logger = logger or logging.getLogger()
    if url.IsFileUrl():
        return FileWildcardIterator(url,
                                    ignore_symlinks=ignore_symlinks,
                                    logger=logger)
    else:  # Cloud URL
        return CloudWildcardIterator(url,
                                     gsutil_api,
                                     all_versions=all_versions,
                                     project_id=project_id)
Beispiel #4
0
    def test_storage_url_from_string(self):
        storage_url = StorageUrlFromString('abc')
        self.assertTrue(storage_url.IsFileUrl())
        self.assertEquals('abc', storage_url.object_name)

        storage_url = StorageUrlFromString('file://abc/123')
        self.assertTrue(storage_url.IsFileUrl())
        self.assertEquals('abc/123', storage_url.object_name)

        storage_url = StorageUrlFromString('gs://abc/123')
        self.assertTrue(storage_url.IsCloudUrl())
        self.assertEquals('abc', storage_url.bucket_name)
        self.assertEquals('123', storage_url.object_name)

        storage_url = StorageUrlFromString('s3://abc/123')
        self.assertTrue(storage_url.IsCloudUrl())
        self.assertEquals('abc', storage_url.bucket_name)
        self.assertEquals('123', storage_url.object_name)
Beispiel #5
0
def _ComputeNeededFileChecksums(logger, src_url_str, src_size, src_crc32c,
                                src_md5, dst_url_str, dst_size, dst_crc32c,
                                dst_md5):
  """Computes any file checksums needed by _ObjectsMatch.

  Args:
    logger: logging.logger for outputting log messages.
    src_url_str: Source URL string.
    src_size: Source size
    src_crc32c: Source CRC32c.
    src_md5: Source MD5.
    dst_url_str: Destination URL string.
    dst_size: Destination size
    dst_crc32c: Destination CRC32c.
    dst_md5: Destination MD5.

  Returns:
    (src_crc32c, src_md5, dst_crc32c, dst_md5)
  """
  src_url = StorageUrlFromString(src_url_str)
  dst_url = StorageUrlFromString(dst_url_str)
  if src_url.IsFileUrl():
    if dst_crc32c != _NA or dst_url.IsFileUrl():
      if src_size > TEN_MIB:
        logger.info('Computing MD5 for %s...', src_url_str)
      with open(src_url.object_name, 'rb') as fp:
        src_crc32c = CalculateB64EncodedCrc32cFromContents(fp)
    elif dst_md5 != _NA or dst_url.IsFileUrl():
      if dst_size > TEN_MIB:
        logger.info('Computing MD5 for %s...', dst_url_str)
      with open(src_url.object_name, 'rb') as fp:
        src_md5 = CalculateB64EncodedMd5FromContents(fp)
  if dst_url.IsFileUrl():
    if src_crc32c != _NA:
      if src_size > TEN_MIB:
        logger.info('Computing CRC32C for %s...', src_url_str)
      with open(dst_url.object_name, 'rb') as fp:
        dst_crc32c = CalculateB64EncodedCrc32cFromContents(fp)
    elif src_md5 != _NA:
      if dst_size > TEN_MIB:
        logger.info('Computing CRC32C for %s...', dst_url_str)
      with open(dst_url.object_name, 'rb') as fp:
        dst_md5 = CalculateB64EncodedMd5FromContents(fp)
  return (src_crc32c, src_md5, dst_crc32c, dst_md5)
Beispiel #6
0
  def RunCommand(self):
    """Command entry point for the hash command."""
    (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = (
        self._ParseOpts(self.sub_opts, self.logger))

    matched_one = False
    for url_str in self.args:
      for file_ref in self.WildcardIterator(
          url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash',
                                                      'customerEncryption',
                                                      'size']):
        matched_one = True
        url = StorageUrlFromString(url_str)
        file_name = file_ref.storage_url.object_name
        if StorageUrlFromString(url_str).IsFileUrl():
          file_size = os.path.getsize(file_name)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size,
                          finished=False, message_type=FileMessage.FILE_HASH))
          callback_processor = ProgressCallbackWithTimeout(
              file_size, FileProgressCallbackHandler(
                  self.gsutil_api.status_queue,
                  src_url=StorageUrlFromString(url_str),
                  operation_name='Hashing').call)
          hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
          with open(file_name, 'rb') as fp:
            CalculateHashesFromContents(fp, hash_dict,
                                        callback_processor=callback_processor)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size, finished=True,
                          message_type=FileMessage.FILE_HASH))
        else:
          hash_dict = {}
          obj_metadata = file_ref.root_object
          file_size = obj_metadata.size
          md5_present = obj_metadata.md5Hash is not None
          crc32c_present = obj_metadata.crc32c is not None
          if not md5_present and not crc32c_present:
            logging.getLogger().warn('No hashes present for %s', url_str)
            continue
          if md5_present:
            hash_dict['md5'] = obj_metadata.md5Hash
          if crc32c_present:
            hash_dict['crc32c'] = obj_metadata.crc32c
        print 'Hashes [%s] for %s:' % (output_format, file_name)
        for name, digest in hash_dict.iteritems():
          print '\tHash (%s):\t\t%s' % (name,
                                        (format_func(digest) if url.IsFileUrl()
                                         else cloud_format_func(digest)))

    if not matched_one:
      raise CommandException('No files matched')
    PutToQueueWithTimeout(self.gsutil_api.status_queue,
                          FinalMessage(time.time()))
    return 0
Beispiel #7
0
def _DiffToApplyArgChecker(command_instance, diff_to_apply):
  """Arg checker that skips symlinks if -e flag specified."""
  if (diff_to_apply.diff_action == _DiffAction.REMOVE
      or not command_instance.exclude_symlinks):
    # No src URL is populated for REMOVE actions.
    return True
  exp_src_url = StorageUrlFromString(diff_to_apply.src_url_str)
  if exp_src_url.IsFileUrl() and os.path.islink(exp_src_url.object_name):
    command_instance.logger.info('Skipping symbolic link %s...', exp_src_url)
    return False
  return True
Beispiel #8
0
def HaveFileUrls(args_to_check):
    """Checks whether args_to_check contain any file URLs.

  Args:
    args_to_check: Command-line argument subset to check.

  Returns:
    True if args_to_check contains any file URLs.
  """
    for url_str in args_to_check:
        storage_url = StorageUrlFromString(url_str)
        if storage_url.IsFileUrl():
            return True
    return False
Beispiel #9
0
    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Beispiel #10
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0