コード例 #1
0
    def __init__(self, command_name, proj_id_handler, headers, debug,
                 bucket_storage_uri_class):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}
コード例 #2
0
  def test_filter_existing_components_versioned(self):
    bucket_name = 'filter_existing_components_bucket_versioned'
    bucket_uri = self.CreateVersionedBucket(bucket_name=bucket_name)

    # Already uploaded, contents still match, component still used.
    fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                   contents='1')
    key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1',
                             bucket_uri=bucket_uri)
    args_uploaded_correctly = PerformResumableUploadIfAppliesArgs(
        fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly,
        key_uploaded_correctly, key_uploaded_correctly.generation, {})

    # Already uploaded, but contents no longer match.
    fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4')
    key_wrong_contents = self.CreateObject(object_name='foo4', contents='_',
                             bucket_uri=bucket_uri)

    args_wrong_contents = PerformResumableUploadIfAppliesArgs(
        fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents,
        key_wrong_contents.generation, {})

    dst_args = {fpath_uploaded_correctly:args_uploaded_correctly,
                fpath_wrong_contents:args_wrong_contents}

    existing_components = [ObjectFromTracker(fpath_uploaded_correctly,
                                             key_uploaded_correctly.generation),
                           ObjectFromTracker(fpath_wrong_contents,
                                             key_wrong_contents.generation)]

    suri_builder = StorageUriBuilder(0, BucketStorageUri)

    (components_to_upload, uploaded_components, existing_objects_to_delete) = (
        FilterExistingComponents(dst_args, existing_components,
                                 bucket_uri.bucket_name, suri_builder))

    self.assertEqual([args_wrong_contents], components_to_upload)
    self.assertEqual(str([args_uploaded_correctly.dst_uri]),
                     str(uploaded_components))
    expected_to_delete = [(args_wrong_contents.dst_uri.object_name,
                           args_wrong_contents.dst_uri.generation)]
    for uri in existing_objects_to_delete:
      self.assertTrue((uri.object_name, uri.generation) in expected_to_delete)
    self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
コード例 #3
0
ファイル: name_expansion.py プロジェクト: isoundy000/v8-1
    def __init__(self,
                 command_name,
                 proj_id_handler,
                 headers,
                 debug,
                 bucket_storage_uri_class,
                 uri_strs,
                 recursion_requested,
                 have_existing_dst_container=None,
                 flat=True,
                 all_versions=False,
                 for_all_version_delete=False):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
      uri_strs: PluralityCheckableIterator of URI strings needing expansion.
      recursion_requested: True if -R specified on command-line.
      have_existing_dst_container: Bool indicator whether this is a copy
          request to an existing bucket, bucket subdir, or directory. Default
          None value should be used in cases where this is not needed (commands
          other than cp).
      flat: Bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.
      all_versions: Bool indicating whether to iterate over all object versions.
      for_all_version_delete: Bool indicating whether this is for an all-version
          delete.

    Examples of _NameExpansionIterator with flat=True:
      - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate all next-level objects under directory
        abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
        matches any objects; otherwise it will enumerate the single name
        gs://bucket/abc
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if flat=False: calling with gs://bucket/abc/* lists matching objects
    or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
        self.uri_strs = uri_strs
        self.recursion_requested = recursion_requested
        self.have_existing_dst_container = have_existing_dst_container
        self.flat = flat
        self.all_versions = all_versions

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}
コード例 #4
0
ファイル: name_expansion.py プロジェクト: isoundy000/v8-1
class _NameExpansionIterator(object):
    """
  Iterates over all src_uris, expanding wildcards, object-less bucket names,
  subdir bucket names, and directory names, generating a flat listing of all
  the matching objects/files.

  You should instantiate this object using the static factory function
  NameExpansionIterator, because consumers of this iterator need the
  PluralityCheckableIterator wrapper built by that function.

  Yields:
    gslib.name_expansion.NameExpansionResult.

  Raises:
    CommandException: if errors encountered.
  """
    def __init__(self,
                 command_name,
                 proj_id_handler,
                 headers,
                 debug,
                 bucket_storage_uri_class,
                 uri_strs,
                 recursion_requested,
                 have_existing_dst_container=None,
                 flat=True,
                 all_versions=False,
                 for_all_version_delete=False):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
      uri_strs: PluralityCheckableIterator of URI strings needing expansion.
      recursion_requested: True if -R specified on command-line.
      have_existing_dst_container: Bool indicator whether this is a copy
          request to an existing bucket, bucket subdir, or directory. Default
          None value should be used in cases where this is not needed (commands
          other than cp).
      flat: Bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.
      all_versions: Bool indicating whether to iterate over all object versions.
      for_all_version_delete: Bool indicating whether this is for an all-version
          delete.

    Examples of _NameExpansionIterator with flat=True:
      - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate all next-level objects under directory
        abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
        matches any objects; otherwise it will enumerate the single name
        gs://bucket/abc
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if flat=False: calling with gs://bucket/abc/* lists matching objects
    or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
        self.uri_strs = uri_strs
        self.recursion_requested = recursion_requested
        self.have_existing_dst_container = have_existing_dst_container
        self.flat = flat
        self.all_versions = all_versions

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}

    def __iter__(self):
        for uri_str in self.uri_strs:
            # Step 1: Expand any explicitly specified wildcards. The output from this
            # step is an iterator of BucketListingRef.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
            if ContainsWildcard(uri_str):
                post_step1_iter = self._WildcardIterator(uri_str)
            else:
                suri = self.suri_builder.StorageUri(uri_str)
                post_step1_iter = iter([BucketListingRef(suri)])
            post_step1_iter = PluralityCheckableIterator(post_step1_iter)

            # Step 2: Expand bucket subdirs and versions. The output from this
            # step is an iterator of (names_container, BucketListingRef).
            # Starting with gs://bucket/abcd this step would expand to:
            #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
            if self.flat and self.recursion_requested:
                post_step2_iter = _ImplicitBucketSubdirIterator(
                    self, post_step1_iter, self.flat)
            elif self.all_versions:
                post_step2_iter = _AllVersionIterator(self,
                                                      post_step1_iter,
                                                      headers=self.headers)
            else:
                post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
            post_step2_iter = PluralityCheckableIterator(post_step2_iter)

            # Step 3. Expand directories and buckets. This step yields the iterated
            # values. Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            exp_src_bucket_listing_refs = []
            wc = self._flatness_wildcard[self.flat]
            src_uri_expands_to_multi = (post_step1_iter.has_plurality()
                                        or post_step2_iter.has_plurality())
            is_multi_src_request = (self.uri_strs.has_plurality()
                                    or src_uri_expands_to_multi)

            if post_step2_iter.is_empty():
                raise CommandException('No URIs matched: %s' % uri_str)
            for (names_container, blr) in post_step2_iter:
                if (not blr.GetUri().names_container()
                        and (self.flat or not blr.HasPrefix())):
                    yield NameExpansionResult(uri_str,
                                              is_multi_src_request,
                                              src_uri_expands_to_multi,
                                              names_container,
                                              blr.GetUriString(),
                                              self.have_existing_dst_container,
                                              is_latest=blr.IsLatest())
                    continue
                if not self.recursion_requested:
                    if blr.GetUri().is_file_uri():
                        desc = 'directory'
                    else:
                        desc = 'bucket'
                    print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
                        desc, blr.GetUri(), self.command_name)
                    continue
                if blr.GetUri().is_file_uri():
                    # Convert dir to implicit recursive wildcard.
                    uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc)
                else:
                    # Convert bucket to implicit recursive wildcard.
                    uri_to_iterate = blr.GetUri().clone_replace_name(wc)
                wc_iter = PluralityCheckableIterator(
                    self._WildcardIterator(uri_to_iterate))
                src_uri_expands_to_multi = (src_uri_expands_to_multi
                                            or wc_iter.has_plurality())
                is_multi_src_request = (self.uri_strs.has_plurality()
                                        or src_uri_expands_to_multi)
                for blr in wc_iter:
                    yield NameExpansionResult(uri_str,
                                              is_multi_src_request,
                                              src_uri_expands_to_multi,
                                              True,
                                              blr.GetUriString(),
                                              self.have_existing_dst_container,
                                              is_latest=blr.IsLatest())

    def _WildcardIterator(self, uri_or_str):
        """
    Helper to instantiate gslib.WildcardIterator. Args are same as
    gslib.WildcardIterator interface, but this method fills in most of the
    values from instance state.

    Args:
      uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
    """
        return wildcard_iterator.wildcard_iterator(
            uri_or_str,
            self.proj_id_handler,
            bucket_storage_uri_class=self.bucket_storage_uri_class,
            headers=self.headers,
            debug=self.debug,
            all_versions=self.all_versions)
コード例 #5
0
class Command(object):
    REQUIRED_SPEC_KEYS = [COMMAND_NAME]

    # Each subclass must define the following map, minimally including the
    # keys in REQUIRED_SPEC_KEYS; other values below will be used as defaults,
    # although for readbility subclasses should specify the complete map.
    command_spec = {
        # Name of command.
        COMMAND_NAME: None,
        # List of command name aliases.
        COMMAND_NAME_ALIASES: [],
        # Min number of args required by this command.
        MIN_ARGS: 0,
        # Max number of args required by this command, or NO_MAX.
        MAX_ARGS: NO_MAX,
        # Getopt-style string specifying acceptable sub args.
        SUPPORTED_SUB_ARGS: '',
        # True if file URIs are acceptable for this command.
        FILE_URIS_OK: False,
        # True if provider-only URIs are acceptable for this command.
        PROVIDER_URIS_OK: False,
        # Index in args of first URI arg.
        URIS_START_ARG: 0,
        # True if must configure gsutil before running command.
        CONFIG_REQUIRED: True,
    }
    _default_command_spec = command_spec
    help_spec = HelpProvider.help_spec
    """Define an empty test specification, which derived classes must populate.

  This is a list of tuples containing the following values:

    step_name - mnemonic name for test, displayed when test is run
    cmd_line - shell command line to run test
    expect_ret or None - expected return code from test (None means ignore)
    (result_file, expect_file) or None - tuple of result file and expected
                                         file to diff for additional test
                                         verification beyond the return code
                                         (None means no diff requested)
  Notes:

  - Setting expected_ret to None means there is no expectation and,
    hence, any returned value will pass.

  - Any occurrences of the string 'gsutil' in the cmd_line parameter
    are expanded to the full path to the gsutil command under test.

  - The cmd_line, result_file and expect_file parameters may
    contain the following special substrings:

    $Bn - converted to one of 10 unique-for-testing bucket names (n=0..9)
    $On - converted to one of 10 unique-for-testing object names (n=0..9)
    $Fn - converted to one of 10 unique-for-testing file names (n=0..9)
    $G  - converted to the directory where gsutil is installed. Useful for
          referencing test data.

  - The generated file names are full pathnames, whereas the generated
    bucket and object names are simple relative names.

  - Tests with a non-None result_file and expect_file automatically
    trigger an implicit diff of the two files.

  - These test specifications, in combination with the conversion strings
    allow tests to be constructed parametrically. For example, here's an
    annotated subset of a test_steps for the cp command:

    # Copy local file to object, verify 0 return code.
    ('simple cp', 'gsutil cp $F1 gs://$B1/$O1', 0, None, None),
    # Copy uploaded object back to local file and diff vs. orig file.
    ('verify cp', 'gsutil cp gs://$B1/$O1 $F2', 0, '$F2', '$F1'),

  - After pattern substitution, the specs are run sequentially, in the
    order in which they appear in the test_steps list.
  """
    test_steps = []

    # Define a convenience property for command name, since it's used many places.
    def _GetDefaultCommandName(self):
        return self.command_spec[COMMAND_NAME]

    command_name = property(_GetDefaultCommandName)

    def __init__(self,
                 command_runner,
                 args,
                 headers,
                 debug,
                 parallel_operations,
                 config_file_list,
                 bucket_storage_uri_class,
                 test_method=None,
                 logging_filters=None):
        """
    Args:
      command_runner: CommandRunner (for commands built atop other commands).
      args: Command-line args (arg0 = actual arg, not command name ala bash).
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      parallel_operations: Should command operations be executed in parallel?
      config_file_list: Config file list returned by GetBotoConfigFileList().
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
                                Settable for testing/mocking.
      test_method: Optional general purpose method for testing purposes.
                   Application and semantics of this method will vary by
                   command and test type.
      logging_filters: Optional list of logging.Filters to apply to this
                       command's logger.

    Implementation note: subclasses shouldn't need to define an __init__
    method, and instead depend on the shared initialization that happens
    here. If you do define an __init__ method in a subclass you'll need to
    explicitly call super().__init__(). But you're encouraged not to do this,
    because it will make changing the __init__ interface more painful.
    """
        # Save class values from constructor params.
        self.command_runner = command_runner
        self.args = args
        self.unparsed_args = args
        self.headers = headers
        self.debug = debug
        self.parallel_operations = parallel_operations
        self.config_file_list = config_file_list
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.test_method = test_method
        self.exclude_symlinks = False
        self.recursion_requested = False
        self.all_versions = False

        # Global instance of a threaded logger object.
        self.logger = _ThreadedLogger(self.command_name)
        if logging_filters:
            for filter in logging_filters:
                self.logger.addFilter(filter)

        # Process sub-command instance specifications.
        # First, ensure subclass implementation sets all required keys.
        for k in self.REQUIRED_SPEC_KEYS:
            if k not in self.command_spec or self.command_spec[k] is None:
                raise CommandException(
                    '"%s" command implementation is missing %s '
                    'specification' % (self.command_name, k))
        # Now override default command_spec with subclass-specified values.
        tmp = self._default_command_spec
        tmp.update(self.command_spec)
        self.command_spec = tmp
        del tmp

        # Make sure command provides a test specification.
        if not self.test_steps:
            # TODO: Uncomment following lines when test feature is ready.
            #raise CommandException('"%s" command implementation is missing test '
            #'specification' % self.command_name)
            pass

        # Parse and validate args.
        try:
            (self.sub_opts,
             self.args) = getopt.getopt(args,
                                        self.command_spec[SUPPORTED_SUB_ARGS])
        except GetoptError, e:
            raise CommandException('%s for "%s" command.' %
                                   (e.msg, self.command_name))
        if (len(self.args) < self.command_spec[MIN_ARGS]
                or len(self.args) > self.command_spec[MAX_ARGS]):
            raise CommandException(
                'Wrong number of arguments for "%s" command.' %
                self.command_name)
        if (not self.command_spec[FILE_URIS_OK] and self.HaveFileUris(
                self.args[self.command_spec[URIS_START_ARG]:])):
            raise CommandException(
                '"%s" command does not support "file://" URIs. '
                'Did you mean to use a gs:// URI?' % self.command_name)
        if (not self.command_spec[PROVIDER_URIS_OK] and self._HaveProviderUris(
                self.args[self.command_spec[URIS_START_ARG]:])):
            raise CommandException(
                '"%s" command does not support provider-only '
                'URIs.' % self.command_name)
        if self.command_spec[CONFIG_REQUIRED]:
            self._ConfigureNoOpAuthIfNeeded()

        self.proj_id_handler = ProjectIdHandler()
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)

        # Cross-platform path to run gsutil binary.
        self.gsutil_cmd = ''
        # Cross-platform list containing gsutil path for use with subprocess.
        self.gsutil_exec_list = []
        # If running on Windows, invoke python interpreter explicitly.
        if gslib.util.IS_WINDOWS:
            self.gsutil_cmd += 'python '
            self.gsutil_exec_list += ['python']
        # Add full path to gsutil to make sure we test the correct version.
        self.gsutil_path = gslib.GSUTIL_PATH
        self.gsutil_cmd += self.gsutil_path
        self.gsutil_exec_list += [self.gsutil_path]

        # We're treating recursion_requested like it's used by all commands, but
        # only some of the commands accept the -R option.
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-r' or o == '-R':
                    self.recursion_requested = True
                    break
コード例 #6
0
ファイル: test_cp.py プロジェクト: Hex29A/gsutil
    def test_filter_existing_components_versioned(self):
        suri_builder = StorageUriBuilder(0, BucketStorageUri)
        bucket_uri = self.CreateVersionedBucket()
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        key_uploaded_correctly = self.CreateObject(object_name='foo1',
                                                   contents='1',
                                                   bucket_uri=bucket_uri)
        args_uploaded_correctly = PerformResumableUploadIfAppliesArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly,
            key_uploaded_correctly, key_uploaded_correctly.generation, {},
            tracker_file, tracker_file_lock)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        key_duplicate = self.CreateObject(object_name='foo1',
                                          contents='1',
                                          bucket_uri=bucket_uri)
        args_duplicate = PerformResumableUploadIfAppliesArgs(
            fpath_duplicate, 0, 1, fpath_duplicate, key_duplicate,
            key_duplicate.generation, {}, tracker_file, tracker_file_lock)
        object_name_duplicate = ObjectFromTracker(
            fpath_duplicate, key_duplicate.generation).object_name
        uri_duplicate = MakeGsUri(bucket_uri.bucket_name,
                                  object_name_duplicate, suri_builder)
        uri_duplicate.generation = args_duplicate.dst_uri.generation

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        key_wrong_contents = self.CreateObject(object_name='foo4',
                                               contents='_',
                                               bucket_uri=bucket_uri)
        args_wrong_contents = PerformResumableUploadIfAppliesArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents,
            key_wrong_contents, key_wrong_contents.generation, {},
            tracker_file, tracker_file_lock)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              key_uploaded_correctly.generation),
            ObjectFromTracker(fpath_duplicate, key_duplicate.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              key_wrong_contents.generation)
        ]

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_uri.bucket_name,
             suri_builder))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(str([args_uploaded_correctly.dst_uri]),
                         str(uploaded_components))
        expected_to_delete = [(args_wrong_contents.dst_uri.object_name,
                               args_wrong_contents.dst_uri.generation),
                              (uri_duplicate.object_name,
                               args_duplicate.dst_uri.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
コード例 #7
0
ファイル: test_cp.py プロジェクト: Hex29A/gsutil
    def test_filter_existing_components_non_versioned(self):
        bucket_uri = self.CreateBucket()
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        key_uploaded_correctly = self.CreateObject(object_name='foo1',
                                                   contents='1',
                                                   bucket_uri=bucket_uri)
        args_uploaded_correctly = PerformResumableUploadIfAppliesArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly,
            key_uploaded_correctly, '', {}, tracker_file, tracker_file_lock)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        key_not_uploaded = self.CreateObject(object_name='foo2',
                                             contents='2',
                                             bucket_uri=bucket_uri)
        args_not_uploaded = PerformResumableUploadIfAppliesArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded, key_not_uploaded, '',
            {}, tracker_file, tracker_file_lock)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        key_wrong_contents = self.CreateObject(object_name='foo4',
                                               contents='_',
                                               bucket_uri=bucket_uri)
        args_wrong_contents = PerformResumableUploadIfAppliesArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents,
            key_wrong_contents, '', {}, tracker_file, tracker_file_lock)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        args_remote_deleted = PerformResumableUploadIfAppliesArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted, '', '', {},
            tracker_file, tracker_file_lock)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        key_no_longer_used = self.CreateObject(object_name='foo6',
                                               contents='6',
                                               bucket_uri=bucket_uri)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        suri_builder = StorageUriBuilder(0, BucketStorageUri)

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_uri.bucket_name,
             suri_builder))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(str([args_uploaded_correctly.dst_uri]),
                         str(uploaded_components))
        self.assertEqual(
            str([
                MakeGsUri(bucket_uri.bucket_name, fpath_no_longer_used,
                          suri_builder)
            ]), str(existing_objects_to_delete))
コード例 #8
0
class NameExpansionHandler(object):
    def __init__(self, command_name, proj_id_handler, headers, debug,
                 bucket_storage_uri_class):
        """
    Args:
      command_name: name of command being run.
      proj_id_handler: ProjectIdHandler to use for current command.
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
          Settable for testing/mocking.
    """
        self.command_name = command_name
        self.proj_id_handler = proj_id_handler
        self.headers = headers
        self.debug = debug
        self.bucket_storage_uri_class = bucket_storage_uri_class
        self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)

        # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
        # (A flat listing means show all objects expanded all the way down.)
        self._flatness_wildcard = {True: '**', False: '*'}

    def WildcardIterator(self, uri_or_str):
        """
    Helper to instantiate gslib.WildcardIterator. Args are same as
    gslib.WildcardIterator interface, but this method fills in most of the
    values from class state.

    Args:
      uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
    """
        return wildcard_iterator.wildcard_iterator(
            uri_or_str,
            self.proj_id_handler,
            bucket_storage_uri_class=self.bucket_storage_uri_class,
            headers=self.headers,
            debug=self.debug)

    def ExpandWildcardsAndContainers(self,
                                     uri_strs,
                                     recursion_requested,
                                     flat=True):
        """
    Expands wildcards, object-less bucket names, subdir bucket names, and
    directory names, producing a flat listing of all the matching objects/files.

    Args:
      uri_strs: List of URI strings needing expansion.
      recursion_requested: True if -R specified on command-line.
      flat: Bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.

    Returns:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.

    Examples with flat=True:
      - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate all next-level objects under directory
        abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
        matches any objects; otherwise it will enumerate the single name
        gs://bucket/abc
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if flat=False: calling with gs://bucket/abc/* lists matching objects
    or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    """
        result = NameExpansionResult()
        for uri_str in uri_strs:

            # Step 1: Expand any explicitly specified wildcards.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
            if ContainsWildcard(uri_str):
                post_step1_bucket_listing_refs = list(
                    self.WildcardIterator(uri_str))
            else:
                post_step1_bucket_listing_refs = [
                    BucketListingRef(self.suri_builder.StorageUri(uri_str))
                ]

            # Step 2: Expand subdirs.
            # Starting with gs://bucket/abcd this step would expand to:
            #   [abcd/o1.txt, abcd/o2.txt].
            uri_names_container = False
            if flat:
                if recursion_requested:
                    post_step2_bucket_listing_refs = []
                    for bucket_listing_ref in post_step1_bucket_listing_refs:
                        (uri_names_container, bucket_listing_refs) = (
                            self._DoImplicitBucketSubdirExpansionIfApplicable(
                                bucket_listing_ref.GetUri(), flat))
                        post_step2_bucket_listing_refs.extend(
                            bucket_listing_refs)
                else:
                    uri_names_container = False
                    post_step2_bucket_listing_refs = post_step1_bucket_listing_refs
            else:
                uri_names_container = False
                post_step2_bucket_listing_refs = post_step1_bucket_listing_refs

            # Step 3. Expand directories and buckets.
            # Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            exp_src_bucket_listing_refs = []
            wc = self._flatness_wildcard[flat]
            for bucket_listing_ref in post_step2_bucket_listing_refs:
                if (not bucket_listing_ref.GetUri().names_container()
                        and (flat or not bucket_listing_ref.HasPrefix())):
                    exp_src_bucket_listing_refs.append(bucket_listing_ref)
                    continue
                if not recursion_requested:
                    if bucket_listing_ref.GetUri().is_file_uri():
                        desc = 'directory'
                    else:
                        desc = 'bucket'
                    print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
                        desc, bucket_listing_ref.GetUri(), self.command_name)
                    continue
                uri_names_container = True
                if bucket_listing_ref.GetUri().is_file_uri():
                    # Convert dir to implicit recursive wildcard.
                    uri_to_iter = '%s/%s' % (bucket_listing_ref.GetUriString(),
                                             wc)
                else:
                    # Convert bucket to implicit recursive wildcard.
                    uri_to_iter = bucket_listing_ref.GetUri(
                    ).clone_replace_name(wc)
                wildcard_result = list(self.WildcardIterator(uri_to_iter))
                if len(wildcard_result) > 0:
                    exp_src_bucket_listing_refs.extend(wildcard_result)

            result._AddExpansion(self.suri_builder.StorageUri(uri_str),
                                 uri_names_container,
                                 exp_src_bucket_listing_refs)

        return result

    def _DoImplicitBucketSubdirExpansionIfApplicable(self, uri, flat):
        """
    Checks whether uri could be an implicit bucket subdir, and expands if so;
    else returns list containing uri. For example gs://abc would be an implicit
    bucket subdir if the -R option was specified and gs://abc/* matches
    anything.
    Can only be called for -R (recursion requested).

    Args:
      uri: StorageUri.
      flat: bool indicating whether bucket listings should be flattened, i.e.,
          so the mapped-to results contain objects spanning subdirectories.

    Returns:
      tuple (names_container, [BucketListingRefs to which uri expanded])
        where names_container is true if URI names a directory, bucket,
        or bucket subdir (vs how StorageUri.names_container() doesn't
        handle latter case).
    """
        names_container = False
        result_list = []
        if uri.names_object():
            # URI could be a bucket subdir.
            implicit_subdir_matches = list(
                self.WildcardIterator(
                    self.suri_builder.StorageUri(
                        '%s/%s' %
                        (uri.uri.rstrip('/'), self._flatness_wildcard[flat]))))
            if len(implicit_subdir_matches) > 0:
                names_container = True
                result_list.extend(implicit_subdir_matches)
            else:
                result_list.append(BucketListingRef(uri))
        else:
            result_list.append(BucketListingRef(uri))
        return (names_container, result_list)

    def StorageUri(self, uri_str):
        """
    Helper to instantiate boto.StorageUri with gsutil default flag values.
    Uses self.bucket_storage_uri_class to support mocking/testing.
    (Identical to the same-named function in command.py; that and this
    copy make it convenient to call StorageUri() with a single argument,
    from the respective classes.)

    Args:
      uri_str: StorageUri naming bucket + optional object.

    Returns:
      boto.StorageUri for given uri_str.

    Raises:
      InvalidUriError: if uri_str not valid.
    """
        return gslib.util.StorageUri(uri_str, self.bucket_storage_uri_class,
                                     self.debug)
コード例 #9
0
    def RunCommand(self):

        if gslib.IS_PACKAGE_INSTALL:
            raise CommandException(
                'Update command is only available for gsutil installed from a '
                'tarball. If you installed gsutil via another method, use the same '
                'method to update it.')

        is_secure = BOTO_IS_SECURE
        if not is_secure[0]:
            raise CommandException(
                'Your boto configuration has %s = False. The update command\n'
                'cannot be run this way, for security reasons.' % is_secure[1])

        self._DisallowUpdataIfDataInGsutilDir()

        force_update = False
        no_prompt = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-f':
                    force_update = True
                if o == '-n':
                    no_prompt = True

        dirs_to_remove = []
        tmp_dir = tempfile.mkdtemp()
        dirs_to_remove.append(tmp_dir)
        os.chdir(tmp_dir)

        if not no_prompt:
            self.logger.info('Checking for software update...')
        if self.args:
            update_from_uri_str = self.args[0]
            if not update_from_uri_str.endswith('.tar.gz'):
                raise CommandException(
                    'The update command only works with tar.gz files.')
            for i, result in enumerate(
                    self.WildcardIterator(update_from_uri_str)):
                if i > 0:
                    raise CommandException(
                        'Invalid update URI. Must name a single .tar.gz file.')
                if result.uri.names_file():
                    if not force_update:
                        raise CommandException((
                            '"update" command does not support "file://" URIs without the '
                            '-f option.'))
                elif not result.uri.names_object():
                    raise CommandException(
                        'Invalid update object URI. Must name a single .tar.gz file.'
                    )
        else:
            update_from_uri_str = GSUTIL_PUB_TARBALL

        # Try to retrieve version info from tarball metadata; failing that; download
        # the tarball and extract the VERSION file. The version lookup will fail
        # when running the update system test, because it retrieves the tarball from
        # a temp file rather than a cloud URI (files lack the version metadata).
        suri_builder = StorageUriBuilder(self.debug,
                                         self.bucket_storage_uri_class)
        tarball_version = LookUpGsutilVersion(
            self.suri_builder.StorageUri(update_from_uri_str))
        if tarball_version:
            tf = None
        else:
            tf = self._FetchAndOpenGsutilTarball(update_from_uri_str)
            tf.extractall()
            with open(os.path.join('gsutil', 'VERSION'), 'r') as ver_file:
                tarball_version = ver_file.read().strip()

        if not force_update and gslib.VERSION == tarball_version:
            self._CleanUpUpdateCommand(tf, dirs_to_remove)
            if self.args:
                raise CommandException('You already have %s installed.' %
                                       update_from_uri_str,
                                       informational=True)
            else:
                raise CommandException(
                    'You already have the latest gsutil release '
                    'installed.',
                    informational=True)

        if not no_prompt:
            (g, m) = CompareVersions(tarball_version, gslib.VERSION)
            if m:
                print('\n'.join(
                    textwrap.wrap(
                        'This command will update to the "%s" version of gsutil at %s. '
                        'NOTE: This a major new version, so it is strongly recommended '
                        'that you review the release note details at %s before updating to '
                        'this version, especially if you use gsutil in scripts.'
                        % (tarball_version, gslib.GSUTIL_DIR,
                           RELEASE_NOTES_URL))))
            else:
                print(
                    'This command will update to the "%s" version of\ngsutil at %s'
                    % (tarball_version, gslib.GSUTIL_DIR))
        self._ExplainIfSudoNeeded(tf, dirs_to_remove)

        if no_prompt:
            answer = 'y'
        else:
            answer = raw_input('Proceed? [y/N] ')
        if not answer or answer.lower()[0] != 'y':
            self._CleanUpUpdateCommand(tf, dirs_to_remove)
            raise CommandException('Not running update.', informational=True)

        if not tf:
            tf = self._FetchAndOpenGsutilTarball(update_from_uri_str)

        # Ignore keyboard interrupts during the update to reduce the chance someone
        # hitting ^C leaves gsutil in a broken state.
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        # gslib.GSUTIL_DIR lists the path where the code should end up (like
        # /usr/local/gsutil), which is one level down from the relative path in the
        # tarball (since the latter creates files in ./gsutil). So, we need to
        # extract at the parent directory level.
        gsutil_bin_parent_dir = os.path.normpath(
            os.path.join(gslib.GSUTIL_DIR, '..'))

        # Extract tarball to a temporary directory in a sibling to GSUTIL_DIR.
        old_dir = tempfile.mkdtemp(dir=gsutil_bin_parent_dir)
        new_dir = tempfile.mkdtemp(dir=gsutil_bin_parent_dir)
        dirs_to_remove.append(old_dir)
        dirs_to_remove.append(new_dir)
        self._EnsureDirsSafeForUpdate(dirs_to_remove)
        try:
            tf.extractall(path=new_dir)
        except Exception, e:
            self._CleanUpUpdateCommand(tf, dirs_to_remove)
            raise CommandException('Update failed: %s.' % e)
コード例 #10
0
    def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug):
        """Checks the last time we checked for an update, and if it's been longer
       than the configured threshold offers the user to update gsutil.

      Args:
        command_name: The name of the command being run.
        debug: Debug level to pass in to boto connection (range 0..3).

      Returns:
        True if the user decides to update.
    """
        # Don't try to interact with user if:
        # - gsutil is not connected to a tty (e.g., if being run from cron);
        # - user is running gsutil -q
        # - user is running the update command (which could otherwise cause an
        #   additional note that an update is available when user is already trying
        #   to perform an update);
        # - user doesn't have credentials configured; or,
        # - user specified gs_host (which could be a non-production different
        #   service instance, in which case credentials won't work for checking
        #   gsutil tarball).
        gs_host = boto.config.get('Credentials', 'gs_host', None)
        if (not sys.stdout.isatty() or not sys.stderr.isatty()
                or not sys.stdin.isatty() or command_name == 'update'
                or not logging.getLogger().isEnabledFor(logging.INFO)
                or not HasConfiguredCredentials() or gs_host):
            return False

        software_update_check_period = boto.config.get(
            'GSUtil', 'software_update_check_period', 30)
        # Setting software_update_check_period to 0 means periodic software
        # update checking is disabled.
        if software_update_check_period == 0:
            return False

        cur_ts = int(time.time())
        if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE):
            # Set last_checked_ts from date of VERSION file, so if the user installed
            # an old copy of gsutil it will get noticed (and an update offered) the
            # first time they try to run it.
            last_checked_ts = int(os.path.getmtime(gslib.VERSION_FILE))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(last_checked_ts))
        else:
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'r') as f:
                last_checked_ts = int(f.readline())

        if (cur_ts - last_checked_ts >
                software_update_check_period * SECONDS_PER_DAY):
            suri_builder = StorageUriBuilder(debug,
                                             self.bucket_storage_uri_class)
            cur_ver = LookUpGsutilVersion(
                suri_builder.StorageUri(GSUTIL_PUB_TARBALL))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(cur_ts))
            if gslib.VERSION != cur_ver:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). A detailed log of gsutil release changes is '
                        'available at gs://pub/gsutil_ReleaseNotes.txt if you would like '
                        'to read them before updating.' %
                        (cur_ver, gslib.VERSION),
                        width=78))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [Y/n]? ')
                return not answer or answer.lower()[0] != 'n'
        return False
コード例 #11
0
    def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug):
        """Checks the last time we checked for an update, and if it's been longer
       than the configured threshold offers the user to update gsutil.

      Args:
        command_name: The name of the command being run.
        debug: Debug level to pass in to boto connection (range 0..3).

      Returns:
        True if the user decides to update.
    """
        # Don't try to interact with user if:
        # - gsutil is not connected to a tty (e.g., if being run from cron);
        # - user is running gsutil -q
        # - user is running the config command (which could otherwise attempt to
        #   check for an update for a user running behind a proxy, who has not yet
        #   configured gsutil to go through the proxy; for such users we need the
        #   first connection attempt to be made by the gsutil config command).
        # - user is running the version command (which gets run when using
        #   gsutil -D, which would prevent users with proxy config problems from
        #   sending us gsutil -D output).
        # - user is running the update command (which could otherwise cause an
        #   additional note that an update is available when user is already trying
        #   to perform an update);
        # - user specified gs_host (which could be a non-production different
        #   service instance, in which case credentials won't work for checking
        #   gsutil tarball).
        gs_host = boto.config.get('Credentials', 'gs_host', None)
        if (not IsRunningInteractively()
                or command_name in ('config', 'update', 'ver', 'version')
                or not logging.getLogger().isEnabledFor(logging.INFO)
                or gs_host):
            return False

        software_update_check_period = boto.config.getint(
            'GSUtil', 'software_update_check_period', 30)
        # Setting software_update_check_period to 0 means periodic software
        # update checking is disabled.
        if software_update_check_period == 0:
            return False

        cur_ts = int(time.time())
        if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE):
            # Set last_checked_ts from date of VERSION file, so if the user installed
            # an old copy of gsutil it will get noticed (and an update offered) the
            # first time they try to run it.
            last_checked_ts = GetGsutilVersionModifiedTime()
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(last_checked_ts))
        else:
            try:
                with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE,
                          'r') as f:
                    last_checked_ts = int(f.readline())
            except (TypeError, ValueError):
                return False

        if (cur_ts - last_checked_ts >
                software_update_check_period * SECONDS_PER_DAY):
            suri_builder = StorageUriBuilder(debug,
                                             self.bucket_storage_uri_class)
            cur_ver = LookUpGsutilVersion(
                suri_builder.StorageUri(GSUTIL_PUB_TARBALL))
            with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f:
                f.write(str(cur_ts))
            (g, m) = CompareVersions(cur_ver, gslib.VERSION)
            if m:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). NOTE: This is a major new version, so it is '
                        'strongly recommended that you review the release note details at %s '
                        'before updating to this version, especially if you use gsutil in '
                        'scripts.' %
                        (cur_ver, gslib.VERSION, RELEASE_NOTES_URL)))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [y/N]? ')
                return answer and answer.lower()[0] == 'y'
            elif g:
                print '\n'.join(
                    textwrap.wrap(
                        'A newer version of gsutil (%s) is available than the version you '
                        'are running (%s). A detailed log of gsutil release changes is '
                        'available at %s if you would like to read them before updating.'
                        % (cur_ver, gslib.VERSION, RELEASE_NOTES_URL)))
                if gslib.IS_PACKAGE_INSTALL:
                    return False
                print
                answer = raw_input('Would you like to update [Y/n]? ')
                return not answer or answer.lower()[0] != 'n'
        return False