Ejemplo n.º 1
0
  def RunCommand(self):
    """Command entry point for the compose command."""
    target_url_str = self.args[-1]
    self.args = self.args[:-1]
    target_url = StorageUrlFromString(target_url_str)
    self.CheckProvider(target_url)
    if target_url.HasGeneration():
      raise CommandException('A version-specific URL (%s) cannot be '
                             'the destination for gsutil compose - abort.'
                             % target_url)

    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
                                                bucket=target_url.bucket_name)

    components = []
    # Remember the first source object so we can get its content type.
    first_src_url = None
    for src_url_str in self.args:
      if ContainsWildcard(src_url_str):
        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
      else:
        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
      for blr in src_url_iter:
        src_url = blr.storage_url
        self.CheckProvider(src_url)

        if src_url.bucket_name != target_url.bucket_name:
          raise CommandException(
              'GCS does not support inter-bucket composing.')

        if not first_src_url:
          first_src_url = src_url
        src_obj_metadata = (
            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
                name=src_url.object_name))
        if src_url.HasGeneration():
          src_obj_metadata.generation = src_url.generation
        components.append(src_obj_metadata)
        # Avoid expanding too many components, and sanity check each name
        # expansion result.
        if len(components) > MAX_COMPOSE_ARITY:
          raise CommandException('"compose" called with too many component '
                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

    if not components:
      raise CommandException('"compose" requires at least 1 component object.')

    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
        first_src_url.bucket_name, first_src_url.object_name,
        provider=first_src_url.scheme, fields=['contentType']).contentType

    preconditions = PreconditionsFromHeaders(self.headers or {})

    self.logger.info(
        'Composing %s from %d component object(s).',
        target_url, len(components))
    self.gsutil_api.ComposeObject(
        components, dst_obj_metadata, preconditions=preconditions,
        provider=target_url.scheme,
        encryption_tuple=GetEncryptionKeyWrapper(config))
Ejemplo n.º 2
0
  def _EnumerateStorageUrls(self, in_urls):
    ret = []

    for url_str in in_urls:
      if ContainsWildcard(url_str):
        ret.extend([blr.storage_url for blr in self.WildcardIterator(url_str)])
      else:
        ret.append(StorageUrlFromString(url_str))

    return ret
Ejemplo n.º 3
0
 def RunCommand(self):
     """Command entry point for stat command."""
     stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS
     found_nonmatching_arg = False
     for url_str in self.args:
         arg_matches = 0
         url = StorageUrlFromString(url_str)
         if not url.IsObject():
             raise CommandException(
                 'The stat command only works with object URLs')
         try:
             if ContainsWildcard(url_str):
                 blr_iter = self.WildcardIterator(url_str).IterObjects(
                     bucket_listing_fields=stat_fields)
             else:
                 try:
                     single_obj = self.gsutil_api.GetObjectMetadata(
                         url.bucket_name,
                         url.object_name,
                         generation=url.generation,
                         provider=url.scheme,
                         fields=stat_fields)
                 except EncryptionException:
                     # Retry without requesting hashes.
                     single_obj = self.gsutil_api.GetObjectMetadata(
                         url.bucket_name,
                         url.object_name,
                         generation=url.generation,
                         provider=url.scheme,
                         fields=UNENCRYPTED_FULL_LISTING_FIELDS)
                 blr_iter = [
                     BucketListingObject(url, root_object=single_obj)
                 ]
             for blr in blr_iter:
                 if blr.IsObject():
                     arg_matches += 1
                     # TODO: Request fewer fields if we're not printing the object.
                     if logging.getLogger().isEnabledFor(logging.INFO):
                         PrintFullInfoAboutObject(blr, incl_acl=False)
         except AccessDeniedException:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(
                     'You aren\'t authorized to read %s - skipping' %
                     url_str)
         except InvalidUrlError:
             raise
         except NotFoundException:
             pass
         if not arg_matches:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str)
             found_nonmatching_arg = True
     if found_nonmatching_arg:
         return 1
     return 0
Ejemplo n.º 4
0
    def _ExpandBucketWildcards(self, bucket_fields=None):
        """Expands bucket and provider wildcards.

    Builds a list of bucket url strings that can be iterated on.

    Args:
      bucket_fields: If present, populate only these metadata fields for
                     buckets.  Example value: ['acl', 'defaultObjectAcl']

    Yields:
      BucketListingRefereneces of type BUCKET.
    """
        bucket_url = StorageUrlFromString(self.wildcard_url.bucket_url_string)
        if (bucket_fields and set(bucket_fields) == set(['id'])
                and not ContainsWildcard(self.wildcard_url.bucket_name)):
            # If we just want the name of a non-wildcarded bucket URL,
            # don't make an RPC.
            yield BucketListingBucket(bucket_url)
        elif (self.wildcard_url.IsBucket()
              and not ContainsWildcard(self.wildcard_url.bucket_name)):
            # If we have a non-wildcarded bucket URL, get just that bucket.
            yield BucketListingBucket(bucket_url,
                                      root_object=self.gsutil_api.GetBucket(
                                          self.wildcard_url.bucket_name,
                                          provider=self.wildcard_url.scheme,
                                          fields=bucket_fields))
        else:
            regex = fnmatch.translate(self.wildcard_url.bucket_name)
            prog = re.compile(regex)

            fields = self._GetToListFields(bucket_fields)
            if fields:
                fields.add('items/id')
            for bucket in self.gsutil_api.ListBuckets(
                    fields=fields,
                    project_id=self.project_id,
                    provider=self.wildcard_url.scheme):
                if prog.match(bucket.id):
                    url = StorageUrlFromString(
                        '%s://%s/' % (self.wildcard_url.scheme, bucket.id))
                    yield BucketListingBucket(url, root_object=bucket)
Ejemplo n.º 5
0
 def RunCommand(self):
     """Command entry point for stat command."""
     # List of fields we'll print for stat objects.
     stat_fields = [
         'updated', 'cacheControl', 'contentDisposition', 'contentEncoding',
         'contentLanguage', 'size', 'contentType', 'componentCount',
         'metadata', 'crc32c', 'md5Hash', 'etag', 'generation',
         'metageneration'
     ]
     found_nonmatching_arg = False
     for url_str in self.args:
         arg_matches = 0
         url = StorageUrlFromString(url_str)
         if not url.IsObject():
             raise CommandException(
                 'The stat command only works with object URLs')
         try:
             if ContainsWildcard(url_str):
                 blr_iter = self.WildcardIterator(url_str).IterObjects(
                     bucket_listing_fields=stat_fields)
             else:
                 single_obj = self.gsutil_api.GetObjectMetadata(
                     url.bucket_name,
                     url.object_name,
                     generation=url.generation,
                     provider=url.scheme,
                     fields=stat_fields)
                 blr_iter = [
                     BucketListingObject(url, root_object=single_obj)
                 ]
             for blr in blr_iter:
                 if blr.IsObject():
                     arg_matches += 1
                     if logging.getLogger().isEnabledFor(logging.INFO):
                         PrintFullInfoAboutObject(blr, incl_acl=False)
         except AccessDeniedException:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(
                     'You aren\'t authorized to read %s - skipping' %
                     url_str)
         except InvalidUrlError:
             raise
         except NotFoundException:
             pass
         if not arg_matches:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write('No URLs matched %s' % url_str)
             found_nonmatching_arg = True
     if found_nonmatching_arg:
         return 1
     return 0
Ejemplo n.º 6
0
    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Ejemplo n.º 7
0
    def __iter__(self,
                 bucket_listing_fields=None,
                 expand_top_level_buckets=False):
        """Iterator that gets called when iterating over the cloud wildcard.

    In the case where no wildcard is present, returns a single matching object,
    single matching prefix, or one of each if both exist.

    Args:
      bucket_listing_fields: Iterable fields to include in bucket listings.
                             Ex. ['name', 'acl'].  Iterator is
                             responsible for converting these to list-style
                             format ['items/name', 'items/acl'] as well as
                             adding any fields necessary for listing such as
                             prefixes.  API implementation is responsible for
                             adding pagination fields.  If this is None,
                             all fields are returned.
      expand_top_level_buckets: If true, yield no BUCKET references.  Instead,
                                expand buckets into top-level objects and
                                prefixes.

    Yields:
      BucketListingRef of type BUCKET, OBJECT or PREFIX.
    """
        single_version_request = self.wildcard_url.HasGeneration()

        # For wildcard expansion purposes, we need at a minimum the name of
        # each object and prefix.  If we're not using the default of requesting
        # all fields, make sure at least these are requested.  The Cloud API
        # tolerates specifying the same field twice.
        get_fields = None
        if bucket_listing_fields:
            get_fields = set()
            for field in bucket_listing_fields:
                get_fields.add(field)
            bucket_listing_fields = self._GetToListFields(
                get_fields=bucket_listing_fields)
            bucket_listing_fields.update(['items/name', 'prefixes'])
            get_fields.update(['name'])
            # If we're making versioned requests, ensure generation and
            # metageneration are also included.
            if single_version_request or self.all_versions:
                bucket_listing_fields.update(
                    ['items/generation', 'items/metageneration'])
                get_fields.update(['generation', 'metageneration'])

        # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
        # iterate over the expanded bucket strings and handle any object
        # wildcarding.
        for bucket_listing_ref in self._ExpandBucketWildcards(
                bucket_fields=['id']):
            bucket_url_string = bucket_listing_ref.url_string
            if self.wildcard_url.IsBucket():
                # IsBucket() guarantees there are no prefix or object wildcards, and
                # thus this is a top-level listing of buckets.
                if expand_top_level_buckets:
                    url = StorageUrlFromString(bucket_url_string)
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            delimiter='/',
                            all_versions=self.all_versions,
                            provider=self.wildcard_url.scheme,
                            fields=bucket_listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            yield self._GetObjectRef(
                                bucket_url_string,
                                obj_or_prefix.data,
                                with_version=self.all_versions)
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX:
                            yield self._GetPrefixRef(bucket_url_string,
                                                     obj_or_prefix.data)
                else:
                    yield bucket_listing_ref
            else:
                # By default, assume a non-wildcarded URL is an object, not a prefix.
                # This prevents unnecessary listings (which are slower, more expensive,
                # and also subject to eventual consistency).
                if (not ContainsWildcard(self.wildcard_url.url_string)
                        and self.wildcard_url.IsObject()
                        and not self.all_versions):
                    try:
                        get_object = self.gsutil_api.GetObjectMetadata(
                            self.wildcard_url.bucket_name,
                            self.wildcard_url.object_name,
                            generation=self.wildcard_url.generation,
                            provider=self.wildcard_url.scheme,
                            fields=get_fields)
                        yield self._GetObjectRef(
                            self.wildcard_url.bucket_url_string,
                            get_object,
                            with_version=(self.all_versions
                                          or single_version_request))
                        return
                    except (NotFoundException, AccessDeniedException):
                        # It's possible this is a prefix - try to list instead.
                        pass

                # Expand iteratively by building prefix/delimiter bucket listing
                # request, filtering the results per the current level's wildcard
                # (if present), and continuing with the next component of the
                # wildcard. See _BuildBucketFilterStrings() documentation for details.
                if single_version_request:
                    url_string = '%s%s#%s' % (bucket_url_string,
                                              self.wildcard_url.object_name,
                                              self.wildcard_url.generation)
                else:
                    # Rstrip any prefixes to correspond with rstripped prefix wildcard
                    # from _BuildBucketFilterStrings().
                    url_string = '%s%s' % (
                        bucket_url_string,
                        StripOneSlash(self.wildcard_url.object_name) or '/'
                    )  # Cover root object named '/' case.
                urls_needing_expansion = [url_string]
                while urls_needing_expansion:
                    url = StorageUrlFromString(urls_needing_expansion.pop(0))
                    (prefix, delimiter, prefix_wildcard,
                     suffix_wildcard) = (self._BuildBucketFilterStrings(
                         url.object_name))
                    prog = re.compile(fnmatch.translate(prefix_wildcard))

                    # If we have a suffix wildcard, we only care about listing prefixes.
                    listing_fields = (set(['prefixes']) if suffix_wildcard else
                                      bucket_listing_fields)

                    # List bucket for objects matching prefix up to delimiter.
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            prefix=prefix,
                            delimiter=delimiter,
                            all_versions=self.all_versions
                            or single_version_request,
                            provider=self.wildcard_url.scheme,
                            fields=listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            gcs_object = obj_or_prefix.data
                            if prog.match(gcs_object.name):
                                if not suffix_wildcard or (StripOneSlash(
                                        gcs_object.name) == suffix_wildcard):
                                    if not single_version_request or (
                                            self._SingleVersionMatches(
                                                gcs_object.generation)):
                                        yield self._GetObjectRef(
                                            bucket_url_string,
                                            gcs_object,
                                            with_version=(
                                                self.all_versions
                                                or single_version_request))
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX
                            prefix = obj_or_prefix.data

                            if ContainsWildcard(prefix):
                                # TODO: Disambiguate user-supplied strings from iterated
                                # prefix and object names so that we can better reason
                                # about wildcards and handle this case without raising an error.
                                raise CommandException(
                                    'Cloud folder %s%s contains a wildcard; gsutil does '
                                    'not currently support objects with wildcards in their '
                                    'name.' % (bucket_url_string, prefix))

                            # If the prefix ends with a slash, remove it.  Note that we only
                            # remove one slash so that we can successfully enumerate dirs
                            # containing multiple slashes.
                            rstripped_prefix = StripOneSlash(prefix)
                            if prog.match(rstripped_prefix):
                                if suffix_wildcard and rstripped_prefix != suffix_wildcard:
                                    # There's more wildcard left to expand.
                                    url_append_string = '%s%s' % (
                                        bucket_url_string, rstripped_prefix +
                                        '/' + suffix_wildcard)
                                    urls_needing_expansion.append(
                                        url_append_string)
                                else:
                                    # No wildcard to expand, just yield the prefix
                                    yield self._GetPrefixRef(
                                        bucket_url_string, prefix)
Ejemplo n.º 8
0
 def testContainsWildcard(self):
     """Tests ContainsWildcard call."""
     self.assertTrue(ContainsWildcard('a*.txt'))
     self.assertTrue(ContainsWildcard('a[0-9].txt'))
     self.assertFalse(ContainsWildcard('0-9.txt'))
     self.assertTrue(ContainsWildcard('?.txt'))
Ejemplo n.º 9
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0