コード例 #1
0
ファイル: name_expansion.py プロジェクト: isoundy000/v8-1
class _NameExpansionIterator(object):
    """
  Iterates over all src_uris, expanding wildcards, object-less bucket names,
  subdir bucket names, and directory names, generating a flat listing of all
  the matching objects/files.

  You should instantiate this object using the static factory function
  NameExpansionIterator, because consumers of this iterator need the
  PluralityCheckableIterator wrapper built by that function.

  Yields:
    gslib.name_expansion.NameExpansionResult.

  Raises:
    CommandException: if errors encountered.
  """
    def __init__(self,
                 command_name,
                 proj_id_handler,
                 headers,
                 debug,
                 bucket_storage_uri_class,
                 uri_strs,
                 recursion_requested,
                 have_existing_dst_container=None,
                 flat=True,
                 all_versions=False,
                 for_all_version_delete=False):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
      uri_strs: PluralityCheckableIterator of URI strings needing expansion.
      recursion_requested: True if -R specified on command-line.
      have_existing_dst_container: Bool indicator whether this is a copy
          request to an existing bucket, bucket subdir, or directory. Default
          None value should be used in cases where this is not needed (commands
          other than cp).
      flat: Bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.
      all_versions: Bool indicating whether to iterate over all object versions.
      for_all_version_delete: Bool indicating whether this is for an all-version
          delete.

    Examples of _NameExpansionIterator with flat=True:
      - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate all next-level objects under directory
        abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
        matches any objects; otherwise it will enumerate the single name
        gs://bucket/abc
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if flat=False: calling with gs://bucket/abc/* lists matching objects
    or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
        self.uri_strs = uri_strs
        self.recursion_requested = recursion_requested
        self.have_existing_dst_container = have_existing_dst_container
        self.flat = flat
        self.all_versions = all_versions

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}

    def __iter__(self):
        for uri_str in self.uri_strs:
            # Step 1: Expand any explicitly specified wildcards. The output from this
            # step is an iterator of BucketListingRef.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
            if ContainsWildcard(uri_str):
                post_step1_iter = self._WildcardIterator(uri_str)
            else:
                suri = self.suri_builder.StorageUri(uri_str)
                post_step1_iter = iter([BucketListingRef(suri)])
            post_step1_iter = PluralityCheckableIterator(post_step1_iter)

            # Step 2: Expand bucket subdirs and versions. The output from this
            # step is an iterator of (names_container, BucketListingRef).
            # Starting with gs://bucket/abcd this step would expand to:
            #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
            if self.flat and self.recursion_requested:
                post_step2_iter = _ImplicitBucketSubdirIterator(
                    self, post_step1_iter, self.flat)
            elif self.all_versions:
                post_step2_iter = _AllVersionIterator(self,
                                                      post_step1_iter,
                                                      headers=self.headers)
            else:
                post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
            post_step2_iter = PluralityCheckableIterator(post_step2_iter)

            # Step 3. Expand directories and buckets. This step yields the iterated
            # values. Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            exp_src_bucket_listing_refs = []
            wc = self._flatness_wildcard[self.flat]
            src_uri_expands_to_multi = (post_step1_iter.has_plurality()
                                        or post_step2_iter.has_plurality())
            is_multi_src_request = (self.uri_strs.has_plurality()
                                    or src_uri_expands_to_multi)

            if post_step2_iter.is_empty():
                raise CommandException('No URIs matched: %s' % uri_str)
            for (names_container, blr) in post_step2_iter:
                if (not blr.GetUri().names_container()
                        and (self.flat or not blr.HasPrefix())):
                    yield NameExpansionResult(uri_str,
                                              is_multi_src_request,
                                              src_uri_expands_to_multi,
                                              names_container,
                                              blr.GetUriString(),
                                              self.have_existing_dst_container,
                                              is_latest=blr.IsLatest())
                    continue
                if not self.recursion_requested:
                    if blr.GetUri().is_file_uri():
                        desc = 'directory'
                    else:
                        desc = 'bucket'
                    print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
                        desc, blr.GetUri(), self.command_name)
                    continue
                if blr.GetUri().is_file_uri():
                    # Convert dir to implicit recursive wildcard.
                    uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc)
                else:
                    # Convert bucket to implicit recursive wildcard.
                    uri_to_iterate = blr.GetUri().clone_replace_name(wc)
                wc_iter = PluralityCheckableIterator(
                    self._WildcardIterator(uri_to_iterate))
                src_uri_expands_to_multi = (src_uri_expands_to_multi
                                            or wc_iter.has_plurality())
                is_multi_src_request = (self.uri_strs.has_plurality()
                                        or src_uri_expands_to_multi)
                for blr in wc_iter:
                    yield NameExpansionResult(uri_str,
                                              is_multi_src_request,
                                              src_uri_expands_to_multi,
                                              True,
                                              blr.GetUriString(),
                                              self.have_existing_dst_container,
                                              is_latest=blr.IsLatest())

    def _WildcardIterator(self, uri_or_str):
        """
    Helper to instantiate gslib.WildcardIterator. Args are same as
    gslib.WildcardIterator interface, but this method fills in most of the
    values from instance state.

    Args:
      uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
    """
        return wildcard_iterator.wildcard_iterator(
            uri_or_str,
            self.proj_id_handler,
            bucket_storage_uri_class=self.bucket_storage_uri_class,
            headers=self.headers,
            debug=self.debug,
            all_versions=self.all_versions)
コード例 #2
0
    def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug):
        """Checks the last time we checked for an update, and if it's been longer
       than the configured threshold offers the user to update gsutil.

      Args:
        command_name: The name of the command being run.
        debug: Debug level to pass in to boto connection (range 0..3).

      Returns:
        True if the user decides to update.
    """
        # Don't try to interact with user if:
        # - gsutil is not connected to a tty (e.g., if being run from cron);
        # - user is running gsutil -q
        # - user is running the update command (which could otherwise cause an
        #   additional note that an update is available when user is already trying
        #   to perform an update);
        # - user doesn't have credentials configured; or,
        # - user specified gs_host (which could be a non-production different
        #   service instance, in which case credentials won't work for checking
        #   gsutil tarball).
        gs_host = boto.config.get('Credentials', 'gs_host', None)
        if (not sys.stdout.isatty() or not sys.stderr.isatty()
                or not sys.stdin.isatty() or command_name == 'update'
                or not logging.getLogger().isEnabledFor(logging.INFO)
                or not HasConfiguredCredentials() or gs_host):
            return False

        software_update_check_period = boto.config.get(
            'GSUtil', 'software_update_check_period', 30)
        # Setting software_update_check_period to 0 means periodic software
        # update checking is disabled.
        if software_update_check_period == 0:
            return False

        cur_ts = int(time.time())
        if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE):
            # Set last_checked_ts from date of VERSION file, so if the user installed
            # an old copy of gsutil it will get noticed (and an update offered) the
            # first time they try to run it.
            last_checked_ts = int(os.path.getmtime(gslib.VERSION_FILE))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(last_checked_ts))
        else:
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'r') as f:
                last_checked_ts = int(f.readline())

        if (cur_ts - last_checked_ts >
                software_update_check_period * SECONDS_PER_DAY):
            suri_builder = StorageUriBuilder(debug,
                                             self.bucket_storage_uri_class)
            cur_ver = LookUpGsutilVersion(
                suri_builder.StorageUri(GSUTIL_PUB_TARBALL))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(cur_ts))
            if gslib.VERSION != cur_ver:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). A detailed log of gsutil release changes is '
                        'available at gs://pub/gsutil_ReleaseNotes.txt if you would like '
                        'to read them before updating.' %
                        (cur_ver, gslib.VERSION),
                        width=78))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [Y/n]? ')
                return not answer or answer.lower()[0] != 'n'
        return False
コード例 #3
0
class NameExpansionHandler(object):
    def __init__(self, command_name, proj_id_handler, headers, debug,
                 bucket_storage_uri_class):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}

    def WildcardIterator(self, uri_or_str):
        """
    Helper to instantiate gslib.WildcardIterator. Args are same as
    gslib.WildcardIterator interface, but this method fills in most of the
    values from class state.

    Args:
      uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
    """
        return wildcard_iterator.wildcard_iterator(
            uri_or_str,
            self.proj_id_handler,
            bucket_storage_uri_class=self.bucket_storage_uri_class,
            headers=self.headers,
            debug=self.debug)

    def ExpandWildcardsAndContainers(self,
                                     uri_strs,
                                     recursion_requested,
                                     flat=True):
        """
    Expands wildcards, object-less bucket names, subdir bucket names, and
    directory names, producing a flat listing of all the matching objects/files.

    Args:
      uri_strs: List of URI strings needing expansion.
      recursion_requested: True if -R specified on command-line.
      flat: Bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.

    Returns:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.

    Examples with flat=True:
      - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate all next-level objects under directory
        abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
        matches any objects; otherwise it will enumerate the single name
        gs://bucket/abc
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if flat=False: calling with gs://bucket/abc/* lists matching objects
    or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    """
        result = NameExpansionResult()
        for uri_str in uri_strs:

            # Step 1: Expand any explicitly specified wildcards.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
            if ContainsWildcard(uri_str):
                post_step1_bucket_listing_refs = list(
                    self.WildcardIterator(uri_str))
            else:
                post_step1_bucket_listing_refs = [
                    BucketListingRef(self.suri_builder.StorageUri(uri_str))
                ]

            # Step 2: Expand subdirs.
            # Starting with gs://bucket/abcd this step would expand to:
            #   [abcd/o1.txt, abcd/o2.txt].
            uri_names_container = False
            if flat:
                if recursion_requested:
                    post_step2_bucket_listing_refs = []
                    for bucket_listing_ref in post_step1_bucket_listing_refs:
                        (uri_names_container, bucket_listing_refs) = (
                            self._DoImplicitBucketSubdirExpansionIfApplicable(
                                bucket_listing_ref.GetUri(), flat))
                        post_step2_bucket_listing_refs.extend(
                            bucket_listing_refs)
                else:
                    uri_names_container = False
                    post_step2_bucket_listing_refs = post_step1_bucket_listing_refs
            else:
                uri_names_container = False
                post_step2_bucket_listing_refs = post_step1_bucket_listing_refs

            # Step 3. Expand directories and buckets.
            # Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            exp_src_bucket_listing_refs = []
            wc = self._flatness_wildcard[flat]
            for bucket_listing_ref in post_step2_bucket_listing_refs:
                if (not bucket_listing_ref.GetUri().names_container()
                        and (flat or not bucket_listing_ref.HasPrefix())):
                    exp_src_bucket_listing_refs.append(bucket_listing_ref)
                    continue
                if not recursion_requested:
                    if bucket_listing_ref.GetUri().is_file_uri():
                        desc = 'directory'
                    else:
                        desc = 'bucket'
                    print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
                        desc, bucket_listing_ref.GetUri(), self.command_name)
                    continue
                uri_names_container = True
                if bucket_listing_ref.GetUri().is_file_uri():
                    # Convert dir to implicit recursive wildcard.
                    uri_to_iter = '%s/%s' % (bucket_listing_ref.GetUriString(),
                                             wc)
                else:
                    # Convert bucket to implicit recursive wildcard.
                    uri_to_iter = bucket_listing_ref.GetUri(
                    ).clone_replace_name(wc)
                wildcard_result = list(self.WildcardIterator(uri_to_iter))
                if len(wildcard_result) > 0:
                    exp_src_bucket_listing_refs.extend(wildcard_result)

            result._AddExpansion(self.suri_builder.StorageUri(uri_str),
                                 uri_names_container,
                                 exp_src_bucket_listing_refs)

        return result

    def _DoImplicitBucketSubdirExpansionIfApplicable(self, uri, flat):
        """
    Checks whether uri could be an implicit bucket subdir, and expands if so;
    else returns list containing uri. For example gs://abc would be an implicit
    bucket subdir if the -R option was specified and gs://abc/* matches
    anything.
    Can only be called for -R (recursion requested).

    Args:
      uri: StorageUri.
      flat: bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.

    Returns:
      tuple (names_container, [BucketListingRefs to which uri expanded])
        where names_container is true if URI names a directory, bucket,
        or bucket subdir (vs how StorageUri.names_container() doesn't
        handle latter case).
    """
        names_container = False
        result_list = []
        if uri.names_object():
            # URI could be a bucket subdir.
            implicit_subdir_matches = list(
                self.WildcardIterator(
                    self.suri_builder.StorageUri(
                        '%s/%s' %
                        (uri.uri.rstrip('/'), self._flatness_wildcard[flat]))))
            if len(implicit_subdir_matches) > 0:
                names_container = True
                result_list.extend(implicit_subdir_matches)
            else:
                result_list.append(BucketListingRef(uri))
        else:
            result_list.append(BucketListingRef(uri))
        return (names_container, result_list)

    def StorageUri(self, uri_str):
        """
    Helper to instantiate boto.StorageUri with gsutil default flag values.
    Uses self.bucket_storage_uri_class to support mocking/testing.
    (Identical to the same-named function in command.py; that and this
    copy make it convenient to call StorageUri() with a single argument,
    from the respective classes.)

    Args:
      uri_str: StorageUri naming bucket + optional object.

    Returns:
      boto.StorageUri for given uri_str.

    Raises:
      InvalidUriError: if uri_str not valid.
    """
        return gslib.util.StorageUri(uri_str, self.bucket_storage_uri_class,
                                     self.debug)
コード例 #4
0
    def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug):
        """Checks the last time we checked for an update, and if it's been longer
       than the configured threshold offers the user to update gsutil.

      Args:
        command_name: The name of the command being run.
        debug: Debug level to pass in to boto connection (range 0..3).

      Returns:
        True if the user decides to update.
    """
        # Don't try to interact with user if:
        # - gsutil is not connected to a tty (e.g., if being run from cron);
        # - user is running gsutil -q
        # - user is running the config command (which could otherwise attempt to
        #   check for an update for a user running behind a proxy, who has not yet
        #   configured gsutil to go through the proxy; for such users we need the
        #   first connection attempt to be made by the gsutil config command).
        # - user is running the version command (which gets run when using
        #   gsutil -D, which would prevent users with proxy config problems from
        #   sending us gsutil -D output).
        # - user is running the update command (which could otherwise cause an
        #   additional note that an update is available when user is already trying
        #   to perform an update);
        # - user specified gs_host (which could be a non-production different
        #   service instance, in which case credentials won't work for checking
        #   gsutil tarball).
        gs_host = boto.config.get('Credentials', 'gs_host', None)
        if (not IsRunningInteractively()
                or command_name in ('config', 'update', 'ver', 'version')
                or not logging.getLogger().isEnabledFor(logging.INFO)
                or gs_host):
            return False

        software_update_check_period = boto.config.getint(
            'GSUtil', 'software_update_check_period', 30)
        # Setting software_update_check_period to 0 means periodic software
        # update checking is disabled.
        if software_update_check_period == 0:
            return False

        cur_ts = int(time.time())
        if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE):
            # Set last_checked_ts from date of VERSION file, so if the user installed
            # an old copy of gsutil it will get noticed (and an update offered) the
            # first time they try to run it.
            last_checked_ts = GetGsutilVersionModifiedTime()
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(last_checked_ts))
        else:
            try:
                with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE,
                          'r') as f:
                    last_checked_ts = int(f.readline())
            except (TypeError, ValueError):
                return False

        if (cur_ts - last_checked_ts >
                software_update_check_period * SECONDS_PER_DAY):
            suri_builder = StorageUriBuilder(debug,
                                             self.bucket_storage_uri_class)
            cur_ver = LookUpGsutilVersion(
                suri_builder.StorageUri(GSUTIL_PUB_TARBALL))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(cur_ts))
            (g, m) = CompareVersions(cur_ver, gslib.VERSION)
            if m:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). NOTE: This is a major new version, so it is '
                        'strongly recommended that you review the release note details at %s '
                        'before updating to this version, especially if you use gsutil in '
                        'scripts.' %
                        (cur_ver, gslib.VERSION, RELEASE_NOTES_URL)))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [y/N]? ')
                return answer and answer.lower()[0] == 'y'
            elif g:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). A detailed log of gsutil release changes is '
                        'available at %s if you would like to read them before updating.'
                        % (cur_ver, gslib.VERSION, RELEASE_NOTES_URL)))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [Y/n]? ')
                return not answer or answer.lower()[0] != 'n'
        return False