Esempio n. 1
0
    def GetAclCommandHelper(self):
        """Common logic for getting ACLs. Gets the standard ACL or the default
    object ACL depending on self.command_name."""

        # Resolve to just one object.
        # Handle wildcard-less URI specially in case this is a version-specific
        # URI, because WildcardIterator().IterUris() would lose the versioning info.
        if not ContainsWildcard(self.args[0]):
            uri = self.suri_builder.StorageUri(self.args[0])
        else:
            uris = list(self.WildcardIterator(self.args[0]).IterUris())
            if len(uris) == 0:
                raise CommandException('No URIs matched')
            if len(uris) != 1:
                raise CommandException(
                    '%s matched more than one URI, which is not '
                    'allowed by the %s command' %
                    (self.args[0], self.command_name))
            uri = uris[0]
        if not uri.names_bucket() and not uri.names_object():
            raise CommandException('"%s" command must specify a bucket or '
                                   'object.' % self.command_name)
        if self.command_name == 'getdefacl':
            acl = uri.get_def_acl(False, self.headers)
        else:
            acl = uri.get_acl(False, self.headers)
        # Pretty-print the XML to make it more easily human editable.
        parsed_xml = xml.dom.minidom.parseString(acl.to_xml().encode('utf-8'))
        print parsed_xml.toprettyxml(indent='    ').encode('utf-8')
Esempio n. 2
0
    def _ErrorCheckCopyRequest(self, src_uri_expansion, dst_uri_str):
        """Checks copy request for problems, and builds needed base_dst_uri.

    base_dst_uri is the base uri to be used if it's a multi-object copy, e.g.,
    the URI for the destination bucket. The actual dst_uri can then be
    constructed from the src_uri and this base_dst_uri.

    Args:
      src_uri_expansion: result from ExpandWildcardsAndContainers call.
      dst_uri_str: string representation of destination StorageUri.

    Returns:
      (base_dst_uri to use for copy, bool indicator of multi-source request).

    Raises:
      CommandException: if errors found.
    """
        for src_uri in src_uri_expansion:
            if src_uri.is_cloud_uri() and not src_uri.bucket_name:
                raise CommandException('Provider-only src_uri (%s)')

        if ContainsWildcard(dst_uri_str):
            matches = list(self.CmdWildcardIterator(dst_uri_str))
            if len(matches) > 1:
                raise CommandException(
                    'Destination (%s) matches more than 1 URI' % dst_uri_str)
            base_dst_uri = matches[0]
        else:
            base_dst_uri = self.StorageUri(dst_uri_str)

        # Make sure entire expansion didn't result in nothing to copy. This can
        # happen if user request copying a directory w/o -r option, for example.
        have_work = False
        for v in src_uri_expansion.values():
            if v:
                have_work = True
                break
        if not have_work:
            raise CommandException('Nothing to copy')

        # If multi-object copy request ensure base_dst_uri names a container.
        multi_src_request = (len(src_uri_expansion) > 1
                             or len(src_uri_expansion.values()[0]) > 1)
        if multi_src_request:
            self.InsistUriNamesContainer(base_dst_uri, self.command_name)

        # Ensure no src/dest pairs would overwrite src. Note that this is
        # more restrictive than the UNIX 'cp' command (which would, for example,
        # allow "mv * dir" and just skip the implied mv dir dir). We disallow such
        # partial completion operations in cloud copies because they are risky.
        for src_uri in iter(src_uri_expansion):
            for exp_src_uri in src_uri_expansion[src_uri]:
                new_dst_uri = self._ConstructDstUri(src_uri, exp_src_uri,
                                                    base_dst_uri)
                if self._SrcDstSame(exp_src_uri, new_dst_uri):
                    raise CommandException(
                        'cp: "%s" and "%s" are the same object - '
                        'abort.' % (exp_src_uri.uri, new_dst_uri.uri))

        return (base_dst_uri, multi_src_request)
Esempio n. 3
0
    def __iter__(self):
        """
    Args:
      command_instance: calling instance of Command class.
      blr: BucketListingRef to expand.

    Yields:
      List of BucketListingRef to which it expands.
    """
        # Do a delimited wildcard expansion so we get any matches along with
        # whether they are keys or prefixes. That way if bucket contains a key
        # 'abcd' and another key 'abce/x.txt' the expansion will return two BLRs,
        # the first with HasKey()=True and the second with HasPrefix()=True.
        rstripped_uri_str = self.blr.GetRStrippedUriString()
        if ContainsWildcard(rstripped_uri_str):
            for blr in self.command_instance.WildcardIterator(
                    rstripped_uri_str):
                yield blr
            return
        # Build a wildcard to expand so CloudWildcardIterator will not just treat it
        # as a key and yield the result without doing a bucket listing.
        for blr in self.command_instance.WildcardIterator(rstripped_uri_str +
                                                          '*'):
            # Find the originally specified BucketListingRef in the expanded list (if
            # present). Don't just use the expanded list, because it would also
            # include objects whose name prefix matches the blr name (because of the
            # wildcard match we did above).  Note that there can be multiple matches,
            # for the case where there's both an object and a subdirectory with the
            # same name.
            if blr.GetRStrippedUriString() == rstripped_uri_str:
                yield blr
Esempio n. 4
0
File: cat.py Progetto: jkff/gsutil
 def _UriIterator(self, uri_str):
     # Generator that returns URI(s) for uri_str. If uri_str is a wildcard we
     # iterate over matches, else we return a single URI.
     if not ContainsWildcard(uri_str):
         yield self.suri_builder.StorageUri(uri_str)
     else:
         for uri in self.WildcardIterator(uri_str).IterUris():
             yield uri
Esempio n. 5
0
    def SetAclCommandHelper(self):
        """
    Common logic for setting ACLs. Sets the standard ACL or the default
    object ACL depending on self.command_name.
    """
        acl_arg = self.args[0]
        uri_args = self.args[1:]
        # Disallow multi-provider setacl requests, because there are differences in
        # the ACL models.
        storage_uri = self.UrisAreForSingleProvider(uri_args)
        if not storage_uri:
            raise CommandException(
                '"%s" command spanning providers not allowed.' %
                self.command_name)

        # Get ACL object from connection for one URI, for interpreting the ACL.
        # This won't fail because the main startup code insists on at least 1 arg
        # for this command.
        acl_class = storage_uri.acl_class()
        canned_acls = storage_uri.canned_acls()

        # Determine whether acl_arg names a file containing XML ACL text vs. the
        # string name of a canned ACL.
        if os.path.isfile(acl_arg):
            acl_file = open(acl_arg, 'r')
            acl_txt = acl_file.read()
            acl_file.close()
            acl_obj = acl_class()
            # Handle wildcard-named bucket.
            if ContainsWildcard(storage_uri.bucket_name):
                try:
                    bucket_uri = self.WildcardIterator(
                        storage_uri.clone_replace_name('')).IterUris().next()
                except StopIteration:
                    raise CommandException('No URIs matched')
            else:
                bucket_uri = storage_uri
            h = handler.XmlHandler(acl_obj, bucket_uri.get_bucket())
            try:
                xml.sax.parseString(acl_txt, h)
            except xml.sax._exceptions.SAXParseException, e:
                raise CommandException(
                    'Requested ACL is invalid: %s at line %s, '
                    'column %s' %
                    (e.getMessage(), e.getLineNumber(), e.getColumnNumber()))
            acl_arg = acl_obj
Esempio n. 6
0
    def RunCommand(self):
        # Refuse to delete a bucket or directory src URI (force users to explicitly
        # do that as a separate operation).
        src_uri_to_check = self.StorageUri(self.args[0])
        if src_uri_to_check.names_container():
            raise CommandException(
                'Will not remove source buckets or directories. '
                'You must separately copy and remove for that '
                'purpose.')

        if len(self.args) > 2:
            self.InsistUriNamesContainer(self.StorageUri(self.args[-1]),
                                         self.command_name)

        # Expand wildcards before calling CopyObjsCommand and RemoveObjsCommand,
        # to prevent the following problem: starting with a bucket containing
        # only the object gs://bucket/obj, say the user does:
        #   gsutil mv gs://bucket/* gs://bucket/d.txt
        # If we didn't expand the wildcard first, the CopyObjsCommand would
        # first copy gs://bucket/obj to gs://bucket/d.txt, and the
        # RemoveObjsCommand would then remove that object.
        exp_arg_list = []
        for uri_str in self.args:
            uri = self.StorageUri(uri_str)
            if ContainsWildcard(uri_str):
                exp_arg_list.extend(
                    str(u) for u in list(self.CmdWildcardIterator(uri)))
            else:
                exp_arg_list.append(uri.uri)

        self.command_runner.RunNamedCommand('cp', exp_arg_list, self.headers,
                                            self.debug,
                                            self.parallel_operations)
        self.command_runner.RunNamedCommand('rm', exp_arg_list[0:-1],
                                            self.headers, self.debug,
                                            self.parallel_operations)
Esempio n. 7
0
    def RunCommand(self):
        # Check each source arg up, refusing to delete a bucket or directory src
        # URI (force users to explicitly do that as a separate operation).
        for arg_to_check in self.args[0:-1]:
            if self.suri_builder.StorageUri(arg_to_check).names_container():
                raise CommandException(
                    'Will not remove source buckets or directories '
                    '(%s).\nYou must separately copy and remove for '
                    'that purpose.' % arg_to_check)

        # Expand wildcards, dirs, buckets, and bucket subdirs in StorageUris
        # before running cp and rm commands, to prevent the
        # following problem: starting with a bucket containing only the object
        # gs://bucket/obj, say the user does:
        #   gsutil mv gs://bucket/* gs://bucket/d.txt
        # If we didn't expand the wildcard first, the cp command would
        # first copy gs://bucket/obj to gs://bucket/d.txt, and the
        # rm command would then remove that object.
        # Note 1: This is somewhat inefficient, since we request a bucket listing
        # here and then again in the generated cp command. TODO: Consider adding
        # an internal interface to cp command to allow this expansion to be passed
        # in.
        # Note 2: We use recursion_requested when expanding wildcards and containers
        # so we can determine if any of the source URIs are directories (and then
        # use cp -R and rm -R to perform the move, to match the behavior of UNIX mv
        # (where moving a directory moves all the contained files).
        src_uri_expansion = self.exp_handler.ExpandWildcardsAndContainers(
            self.args[0:len(self.args) - 1], True)
        exp_arg_list = list(src_uri_expansion.IterExpandedUriStrings())

        # Check whether exp_arg_list has any file:// URIs, and disallow it. Note
        # that we can't simply set FILE_URIS_OK to False in command_spec because
        # we *do* allow a file URI for the dest URI. (We allow users to move data
        # out of the cloud to the local disk, but we disallow commands that would
        # delete data off the local disk, and instead require the user to delete
        # data separately, using local commands/tools.)
        if self.HaveFileUris(exp_arg_list):
            raise CommandException(
                '"mv" command does not support "file://" URIs for '
                'source arguments.\nDid you mean to use a '
                'gs:// URI?')

        if src_uri_expansion.IsEmpty():
            raise CommandException('No URIs matched')

        # If any of the src URIs are directories add -R to options to be passed to
        # cp and rm commands.
        self.recursion_requested = False
        for src_uri in src_uri_expansion.GetSrcUris():
            if src_uri_expansion.NamesContainer(src_uri):
                self.recursion_requested = True
                # Disallow wildcard src URIs when moving directories, as supporting it
                # would make the name transformation too complex and would also be
                # dangerous (e.g., someone could accidentally move many objects to the
                # wrong name, or accidentally overwrite many objects).
                if ContainsWildcard(src_uri):
                    raise CommandException(
                        'mv command disallows naming source directories using wildcards'
                    )

        # Add command-line opts back in front of args so they'll be picked up by cp
        # and rm commands (e.g., for -p option). Use undocumented (internal
        # use-only) cp -M option to request move naming semantics (see
        # _ConstructDstUri in cp.py).
        unparsed_args = ['-M']
        if self.recursion_requested:
            unparsed_args.append('-R')
            exp_arg_list.insert(0, '-R')
        unparsed_args.extend(self.unparsed_args)
        self.command_runner.RunNamedCommand('cp', unparsed_args, self.headers,
                                            self.debug,
                                            self.parallel_operations)
        # See comment above about why we're passing exp_arg_list instead of
        # unparsed_args here.
        self.command_runner.RunNamedCommand('rm', exp_arg_list, self.headers,
                                            self.debug,
                                            self.parallel_operations)
Esempio n. 8
0
    def _ExpandWildcardsAndContainers(self, uri_strs):
        """Expands URI wildcarding, object-less bucket names, and directory names.

    Examples:
      Calling with uri_strs='gs://bucket' will enumerate all contained objects.
      Calling with uri_strs='file:///tmp' will enumerate all files under /tmp
         (or under any subdirectory).
      The previous example is equivalent to uri_strs='file:///tmp/*'
         and to uri_strs='file:///tmp/**'

    Args:
      uri_strs: URI strings needing expansion

    Returns:
      dict mapping StorageUri -> list of StorageUri, for each input uri_str.

      We build a dict of the expansion instead of using a generator to
      iterate incrementally because caller needs to know count before
      iterating and performing copy operations (in order to determine if
      this is a multi-source copy request). That limits the scalability of
      wildcard iteration, since the entire list needs to fit in memory.
    """
        # The algorithm we use is:
        # 1. Build a first level expanded list from uri_strs consisting of all
        #    URIs that aren't file wildcards, plus expansions of the file wildcards.
        # 2. Build dict from above expanded list.
        #    We do so that we can properly handle the following example:
        #      gsutil cp file0 dir0 gs://bucket
        #    where dir0 contains file1 and dir1/file2.
        # If we didn't do the first expansion, this cp command would end up
        # with this expansion:
        #   {file://file0:[file://file0],file://dir0:[file://dir0/file1,
        #                                             file://dir0/dir1/file2]}
        # instead of the (correct) expansion:
        #   {file://file0:[file://file0],file://dir0/file1:[file://dir0/file1],
        #                                file://dir0/dir1:[file://dir0/dir1/file2]}
        # The latter expansion is needed so that in the "Copying..." loop of
        # CopyObjsCommand we know that dir0 was being copied, so we create an
        # object called gs://bucket/dir0/dir1/file2. (Otherwise it would look
        # like a single file was being copied, so we'd create an object called
        # gs://bucket/file2.)

        should_recurse = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-r' or o == '-R':
                    should_recurse = True

        # Step 1.
        uris_to_expand = []
        for uri_str in uri_strs:
            uri = self.StorageUri(uri_str)
            if uri.is_file_uri() and ContainsWildcard(uri_str):
                uris_to_expand.extend(list(self.CmdWildcardIterator(uri)))
            elif uri.is_file_uri() and uri.is_stream():
                # Special case for Streams
                uri_dict = {}
                uri_dict[uri] = [uri]
                return uri_dict
            else:
                uris_to_expand.append(uri)

        # Step 2.
        result = {}
        for uri in uris_to_expand:
            if uri.names_container():
                if not should_recurse:
                    if uri.is_file_uri():
                        desc = 'directory'
                    else:
                        desc = 'bucket'
                    print 'Omitting %s "%s".' % (desc, uri.uri)
                    result[uri] = []
                    continue
                if uri.is_file_uri():
                    # dir -> convert to implicit recursive wildcard.
                    uri_to_iter = '%s/**' % uri.uri
                else:
                    # bucket -> convert to implicit wildcard.
                    uri_to_iter = uri.clone_replace_name('*')
            else:
                uri_to_iter = uri
            result[uri] = list(self.CmdWildcardIterator(uri_to_iter))
        return result
Esempio n. 9
0
    def RunCommand(self):
        got_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-b':
                    get_bucket_info = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.proj_id_handler.SetProjectId(a)
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0
        for uri_str in self.args:
            uri = self.suri_builder.StorageUri(uri_str)
            self.proj_id_handler.FillInProjectHeaderIfNeeded(
                'ls', uri, self.headers)

            if uri.names_provider():
                # Provider URI: use bucket wildcard to list buckets.
                for uri in self.WildcardIterator('%s://*' %
                                                 uri.scheme).IterUris():
                    (bucket_objs,
                     bucket_bytes) = self._PrintBucketInfo(uri, listing_style)
                    total_bytes += bucket_bytes
                    total_objs += bucket_objs
            elif uri.names_bucket():
                # Bucket URI -> list the object(s) in that bucket.
                if get_bucket_info:
                    # ls -b bucket listing request: List info about bucket(s).
                    for uri in self.WildcardIterator(uri).IterUris():
                        (bucket_objs, bucket_bytes) = self._PrintBucketInfo(
                            uri, listing_style)
                        total_bytes += bucket_bytes
                        total_objs += bucket_objs
                else:
                    # Not -b request: List objects in the bucket(s).
                    (no, nb) = self._ExpandUriAndPrintInfo(
                        uri,
                        listing_style,
                        should_recurse=self.recursion_requested)
                    if no == 0 and ContainsWildcard(uri):
                        got_nomatch_errors = True
                    total_objs += no
                    total_bytes += nb
            else:
                # URI names an object or object subdir -> list matching object(s) /
                # subdirs.
                (exp_objs, exp_bytes) = self._ExpandUriAndPrintInfo(
                    uri,
                    listing_style,
                    should_recurse=self.recursion_requested)
                if exp_objs == 0 and ContainsWildcard(uri):
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URIs matched no objects.')
Esempio n. 10
0
    def _ExpandUriAndPrintInfo(self, uri, listing_style, should_recurse=False):
        """
    Expands wildcards and directories/buckets for uri as needed, and
    calls _PrintInfoAboutBucketListingRef() on each.

    Args:
      uri: StorageUri being listed.
      listing_style: ListingStyle enum describing type of output desired.
      should_recurse: bool indicator of whether to expand recursively.

    Returns:
      Tuple (number of matching objects, number of bytes across these objects).
    """
        # We do a two-level loop, with the outer loop iterating level-by-level from
        # blrs_to_expand, and the inner loop iterating the matches at the current
        # level, printing them, and adding any new subdirs that need expanding to
        # blrs_to_expand (to be picked up in the next outer loop iteration).
        blrs_to_expand = [BucketListingRef(uri)]
        num_objs = 0
        num_bytes = 0
        expanding_top_level = True
        printed_one = False
        num_expanded_blrs = 0
        while len(blrs_to_expand):
            if printed_one:
                print
            blr = blrs_to_expand.pop(0)
            if blr.HasKey():
                blr_iterator = iter([blr])
            elif blr.HasPrefix():
                # Bucket subdir from a previous iteration. Print "header" line only if
                # we're listing more than one subdir (or if it's a recursive listing),
                # to be consistent with the way UNIX ls works.
                if num_expanded_blrs > 1 or should_recurse:
                    print '%s:' % blr.GetUriString().encode('utf-8')
                    printed_one = True
                blr_iterator = self.WildcardIterator(
                    '%s/*' % blr.GetRStrippedUriString())
            elif blr.NamesBucket():
                blr_iterator = self.WildcardIterator('%s*' %
                                                     blr.GetUriString())
            else:
                # This BLR didn't come from a bucket listing. This case happens for
                # BLR's instantiated from a user-provided URI.
                blr_iterator = PluralityCheckableIterator(
                    _UriOnlyBlrExpansionIterator(self, blr))
                if blr_iterator.is_empty() and not ContainsWildcard(uri):
                    raise CommandException('No such object %s' % uri)
            for cur_blr in blr_iterator:
                num_expanded_blrs = num_expanded_blrs + 1
                if cur_blr.HasKey():
                    # Object listing.
                    (no, nb) = self._PrintInfoAboutBucketListingRef(
                        cur_blr, listing_style)
                    num_objs += no
                    num_bytes += nb
                    printed_one = True
                else:
                    # Subdir listing. If we're at the top level of a bucket subdir
                    # listing don't print the list here (corresponding to how UNIX ls
                    # dir just prints its contents, not the name followed by its
                    # contents).
                    if (expanding_top_level
                            and not uri.names_bucket()) or should_recurse:
                        if cur_blr.GetUriString().endswith('//'):
                            # Expand gs://bucket// into gs://bucket//* so we don't infinite
                            # loop. This case happens when user has uploaded an object whose
                            # name begins with a /.
                            cur_blr = BucketListingRef(
                                self.suri_builder.StorageUri(
                                    '%s*' % cur_blr.GetUriString()), None,
                                None, cur_blr.headers)
                        blrs_to_expand.append(cur_blr)
                    # Don't include the subdir name in the output if we're doing a
                    # recursive listing, as it will be printed as 'subdir:' when we get
                    # to the prefix expansion, the next iteration of the main loop.
                    else:
                        if listing_style == ListingStyle.LONG:
                            print '%-33s%s' % (
                                '', cur_blr.GetUriString().encode('utf-8'))
                        else:
                            print cur_blr.GetUriString().encode('utf-8')
            expanding_top_level = False
        return (num_objs, num_bytes)
Esempio n. 11
0
File: ls.py Progetto: jkff/gsutil
    def RunCommand(self):
        got_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.proj_id_handler.SetProjectId(a)
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0
        for uri_str in self.args:
            uri = self.suri_builder.StorageUri(uri_str)
            self.proj_id_handler.FillInProjectHeaderIfNeeded(
                'ls', uri, self.headers)

            if uri.names_provider():
                # Provider URI: use bucket wildcard to list buckets.
                for uri in self.WildcardIterator('%s://*' %
                                                 uri.scheme).IterUris():
                    self._PrintBucketInfo(uri, listing_style)
            elif uri.names_bucket():
                # Bucket URI -> list the object(s) in that bucket.
                if get_bucket_info:
                    # ls -b bucket listing request: List info about bucket(s).

                    if (listing_style != ListingStyle.LONG_LONG
                            and not ContainsWildcard(uri)):
                        # At this point, we haven't done any validation that the bucket URI
                        # actually exists. If the listing style is short, the
                        # _PrintBucketInfo doesn't do any RPCs, so check to make sure the
                        # bucket actually exists by fetching it.
                        uri.get_bucket(validate=True)

                    for uri in self.WildcardIterator(uri).IterUris():
                        self._PrintBucketInfo(uri, listing_style)
                else:
                    # Not -b request: List objects in the bucket(s).
                    (no, nb) = self._ExpandUriAndPrintInfo(
                        uri,
                        listing_style,
                        should_recurse=self.recursion_requested)
                    if no == 0 and ContainsWildcard(uri):
                        got_nomatch_errors = True
                    total_objs += no
                    total_bytes += nb
            else:
                # URI names an object or object subdir -> list matching object(s) /
                # subdirs.
                (exp_objs, exp_bytes) = self._ExpandUriAndPrintInfo(
                    uri,
                    listing_style,
                    should_recurse=self.recursion_requested)
                if exp_objs == 0 and ContainsWildcard(uri):
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URIs matched no objects.')

        return 0
Esempio n. 12
0
    def RunCommand(self):
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0
        got_nomatch_errors = False

        for uri_str in self.args:
            uri = self.suri_builder.StorageUri(uri_str)

            # Treat this as the ls command for this function.
            self.proj_id_handler.FillInProjectHeaderIfNeeded(
                'ls', uri, self.headers)

            iter_bytes = 0
            if uri.names_provider():
                # Provider URI: use bucket wildcard to list buckets.
                for uri in self.WildcardIterator('%s://*' %
                                                 uri.scheme).IterUris():
                    exp_objs, exp_bytes = self._RecursePrint(
                        BucketListingRef(uri))
                    iter_bytes += exp_bytes
                    total_objs += exp_objs
            else:
                exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri))
                if (exp_objs == 0 and ContainsWildcard(uri)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                iter_bytes += exp_bytes
                total_objs += exp_objs

            total_bytes += iter_bytes
            if self.summary_only:
                self._PrintSummaryLine(iter_bytes, uri_str)

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URIs matched no objects.')

        return 0
Esempio n. 13
0
    def _RecursePrint(self, blr):
        """
    Expands a bucket listing reference and recurses to its children, calling
    _PrintInfoAboutBucketListingRef for each expanded object found.

    Args:
      blr: An instance of BucketListingRef.

    Returns:
      Tuple containing (number of object, total number of bytes)
    """
        num_bytes = 0
        num_objs = 0

        if blr.HasKey():
            blr_iterator = iter([blr])
        elif blr.HasPrefix():
            blr_iterator = self.WildcardIterator(
                '%s/*' % blr.GetRStrippedUriString(),
                all_versions=self.all_versions)
        elif blr.NamesBucket():
            blr_iterator = self.WildcardIterator(
                '%s*' % blr.GetUriString(), all_versions=self.all_versions)
        else:
            # This BLR didn't come from a bucket listing. This case happens for
            # BLR's instantiated from a user-provided URI.
            blr_iterator = PluralityCheckableIterator(
                UriOnlyBlrExpansionIterator(self,
                                            blr,
                                            all_versions=self.all_versions))
            if blr_iterator.is_empty() and not ContainsWildcard(
                    blr.GetUriString()):
                raise CommandException('No such object %s' %
                                       blr.GetUriString())

        for cur_blr in blr_iterator:
            if self.exclude_patterns:
                tomatch = cur_blr.GetUriString()
                skip = False
                for pattern in self.exclude_patterns:
                    if fnmatch.fnmatch(tomatch, pattern):
                        skip = True
                        break
                if skip:
                    continue
            if cur_blr.HasKey():
                # Object listing.
                no, nb = self._PrintInfoAboutBucketListingRef(cur_blr)
            else:
                # Subdir listing.
                if cur_blr.GetUriString().endswith('//'):
                    # Expand gs://bucket// into gs://bucket//* so we don't infinite
                    # loop. This case happens when user has uploaded an object whose
                    # name begins with a /.
                    cur_blr = BucketListingRef(
                        self.suri_builder.StorageUri(
                            '%s*' % cur_blr.GetUriString()), None, None,
                        cur_blr.headers)
                no, nb = self._RecursePrint(cur_blr)
            num_bytes += nb
            num_objs += no

        if blr.HasPrefix() and not self.summary_only:
            self._PrintSummaryLine(num_bytes,
                                   blr.GetUriString().encode('utf-8'))

        return num_objs, num_bytes
 def testContainsWildcard(self):
     """Tests ContainsWildcard call"""
     self.assertTrue(ContainsWildcard('a*.txt'))
     self.assertTrue(ContainsWildcard('a[0-9].txt'))
     self.assertFalse(ContainsWildcard('0-9.txt'))
     self.assertTrue(ContainsWildcard('?.txt'))