def testWarnIfMvEarlyDeletionChargeApplies(self):
    """Tests that WarnIfEarlyDeletionChargeApplies warns when appropriate."""
    test_logger = logging.Logger('test')
    src_url = StorageUrlFromString('gs://bucket/object')

    # Recent nearline objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=29, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'nearline',
            src_url.url_string, 30)

    # Recent coldine objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=89, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='COLDLINE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'coldline',
            src_url.url_string, 90)

    # Recent archive objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=364, hours=23)):
      recent_archive_obj = apitools_messages.Object(
          storageClass='ARCHIVE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_archive_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'archive',
            src_url.url_string, 365)

    # Sufficiently old objects should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=30, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_nearline_obj, test_logger)
      mocked_warn.assert_not_called()
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_coldline_obj = apitools_messages.Object(
          storageClass='COLDLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=90, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_coldline_obj, test_logger)
      mocked_warn.assert_not_called()
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_archive_obj = apitools_messages.Object(
          storageClass='ARCHIVE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=365, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_archive_obj, test_logger)
      mocked_warn.assert_not_called()

    # Recent standard storage class object should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      not_old_enough_nearline_obj = apitools_messages.Object(
          storageClass='STANDARD', timeCreated=self._PI_DAY)
      WarnIfMvEarlyDeletionChargeApplies(src_url, not_old_enough_nearline_obj,
                                         test_logger)
      mocked_warn.assert_not_called()
Example #2
0
  def _PatchIam(self):
    self.continue_on_error = False
    self.recursion_requested = False

    patch_bindings_tuples = []

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o in ['-r', '-R']:
          self.recursion_requested = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-d':
          patch_bindings_tuples.append(BindingStringToTuple(False, a))

    patterns = []

    # N.B.: self.sub_opts stops taking in options at the first non-flagged
    # token. The rest of the tokens are sent to self.args. Thus, in order to
    # handle input of the form "-d <binding> <binding> <url>", we will have to
    # parse self.args for a mix of both bindings and CloudUrls. We are not
    # expecting to come across the -r, -f flags here.
    it = iter(self.args)
    for token in it:
      if token == '-d':
        patch_bindings_tuples.append(
            BindingStringToTuple(False, it.next()))
      else:
        try:
          patch_bindings_tuples.append(
              BindingStringToTuple(True, token)
          )
        # All following arguments are urls.
        except (ArgumentException, CommandException):
          patterns.append(token)
          for token in it:
            patterns.append(token)

    # We must have some bindings to process, else this is pointless.
    if not patch_bindings_tuples:
      raise CommandException('Must specify at least one binding.')

    self.everything_set_okay = True
    threaded_wildcards = []
    for pattern in patterns:
      surl = StorageUrlFromString(pattern)
      try:
        if surl.IsBucket():
          if self.recursion_requested:
            surl.object = '*'
            threaded_wildcards.append(surl.url_string)
          else:
            self.PatchIamHelper(surl, patch_bindings_tuples)
        else:
          threaded_wildcards.append(surl.url_string)
      except AttributeError:
        error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
        if set(surl.object_name).issubset(set('-Rrf')):
          error_msg += (
              ' This resource handle looks like a flag, which must appear '
              'before all bindings. See "gsutil help iam ch" for more details.'
          )
        raise CommandException(error_msg)

    if threaded_wildcards:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name, self.debug,
          self.logger, self.gsutil_api,
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=['name'])

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name, self.debug, self.GetSeekAheadGsutilApi(),
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions)

      # N.B.: Python2.6 support means we can't use a partial function here to
      # curry the bindings tuples into the wrapper function. We instead pass
      # the bindings along by zipping them with each name_expansion_iterator
      # result. See http://bugs.python.org/issue5228.
      serialized_bindings_tuples_it = itertools.repeat(
          [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
      self.Apply(
          _PatchIamWrapper,
          itertools.izip(
              serialized_bindings_tuples_it, name_expansion_iterator),
          _PatchIamExceptionHandler,
          fail_on_error=not self.continue_on_error,
          seek_ahead_iterator=seek_ahead_iterator)

      self.everything_set_okay &= not GetFailureCount() > 0

    # TODO: Add an error counter for files and objects.
    if not self.everything_set_okay:
      raise CommandException('Some IAM policies could not be patched.')
Example #3
0
  def CatUrlStrings(self, url_strings, show_header=False, start_byte=0,
                    end_byte=None, cat_out_fd=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # This should refer to whatever sys.stdin refers to when this method is
    # run, not when this method is defined, so we do the initialization here
    # rather than define sys.stdin as the cat_out_fd parameter's default value.
    if cat_out_fd is None:
      cat_out_fd = sys.stdout
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    try:
      if url_strings and url_strings[0] in ('-', 'file://-'):
        self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
      else:
        for url_str in url_strings:
          did_some_work = False
          # TODO: Get only the needed fields here.
          for blr in self.command_obj.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
            decryption_keywrapper = None
            if (blr.root_object and
                blr.root_object.customerEncryption and
                blr.root_object.customerEncryption.keySha256):
              decryption_key = FindMatchingCSEKInBotoConfig(
                  blr.root_object.customerEncryption.keySha256, config)
              if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption '
                    'key matches object %s' % (
                        blr.root_object.customerEncryption.keySha256,
                        blr.url_string))
              decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key)

            did_some_work = True
            if show_header:
              if printed_one:
                print
              print '==> %s <==' % blr
              printed_one = True
            cat_object = blr.root_object
            storage_url = StorageUrlFromString(blr.url_string)
            if storage_url.IsCloudUrl():
              compressed_encoding = ObjectIsGzipEncoded(cat_object)
              self.command_obj.gsutil_api.GetObjectMedia(
                  cat_object.bucket, cat_object.name, cat_out_fd,
                  compressed_encoding=compressed_encoding,
                  start_byte=start_byte, end_byte=end_byte,
                  object_size=cat_object.size,
                  generation=storage_url.generation,
                  decryption_tuple=decryption_keywrapper,
                  provider=storage_url.scheme)
            else:
              with open(storage_url.object_name, 'rb') as f:
                self._WriteBytesBufferedFileToFile(f, cat_out_fd)
          if not did_some_work:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
    finally:
      sys.stdout = old_stdout

    return 0
Example #4
0
  def _PatchIam(self):
    self.continue_on_error = False
    self.recursion_requested = False

    patch_bindings_tuples = []

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o in ['-r', '-R']:
          self.recursion_requested = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-d':
          patch_bindings_tuples.append(BindingStringToTuple(False, a))

    patterns = []

    # N.B.: self.sub_opts stops taking in options at the first non-flagged
    # token. The rest of the tokens are sent to self.args. Thus, in order to
    # handle input of the form "-d <binding> <binding> <url>", we will have to
    # parse self.args for a mix of both bindings and CloudUrls. We are not
    # expecting to come across the -r, -f flags here.
    it = iter(self.args)
    for token in it:
      if token == '-d':
        patch_bindings_tuples.append(
            BindingStringToTuple(False, it.next()))
      else:
        try:
          patch_bindings_tuples.append(
              BindingStringToTuple(True, token)
          )
        # All following arguments are urls.
        except (ArgumentException, CommandException):
          patterns.append(token)
          for token in it:
            patterns.append(token)

    # We must have some bindings to process, else this is pointless.
    if not patch_bindings_tuples:
      raise CommandException('Must specify at least one binding.')

    self.everything_set_okay = True
    threaded_wildcards = []
    for pattern in patterns:
      surl = StorageUrlFromString(pattern)
      try:
        if surl.IsBucket():
          if self.recursion_requested:
            surl.object = '*'
            threaded_wildcards.append(surl.url_string)
          else:
            self.PatchIamHelper(surl, patch_bindings_tuples)
        else:
          threaded_wildcards.append(surl.url_string)
      except AttributeError:
        error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
        if set(surl.object_name).issubset(set('-Rrf')):
          error_msg += (
              ' This resource handle looks like a flag, which must appear '
              'before all bindings. See "gsutil help iam ch" for more details.'
          )
        raise CommandException(error_msg)

    if threaded_wildcards:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name, self.debug,
          self.logger, self.gsutil_api,
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=['name'])

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name, self.debug, self.GetSeekAheadGsutilApi(),
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions)

      # N.B.: Python2.6 support means we can't use a partial function here to
      # curry the bindings tuples into the wrapper function. We instead pass
      # the bindings along by zipping them with each name_expansion_iterator
      # result. See http://bugs.python.org/issue5228.
      serialized_bindings_tuples_it = itertools.repeat(
          [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
      self.Apply(
          _PatchIamWrapper,
          itertools.izip(
              serialized_bindings_tuples_it, name_expansion_iterator),
          _PatchIamExceptionHandler,
          fail_on_error=not self.continue_on_error,
          seek_ahead_iterator=seek_ahead_iterator)

      self.everything_set_okay &= not GetFailureCount() > 0

    # TODO: Add an error counter for files and objects.
    if not self.everything_set_okay:
      raise CommandException('Some IAM policies could not be patched.')
Example #5
0
class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase):
    """Unit tests for the HashingFileUploadWrapper class."""

    _temp_test_file = None
    _dummy_url = StorageUrlFromString('gs://bucket/object')

    def _GetTestFile(self):
        contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
        if not self._temp_test_file:
            self._temp_test_file = self.CreateTempFile(file_name=_TEST_FILE,
                                                       contents=contents)
        return self._temp_test_file

    def testReadToEOF(self):
        digesters = {'md5': GetMd5()}
        tmp_file = self.CreateTempFile(contents=b'a' * TRANSFER_BUFFER_SIZE *
                                       4)
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read()
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def _testSeekBack(self, initial_position, seek_back_amount):
        """Tests reading then seeking backwards.

    This function simulates an upload that is resumed after a connection break.
    It reads one transfer buffer at a time until it reaches initial_position,
    then seeks backwards (as if the server did not receive some of the bytes)
    and reads to the end of the file, ensuring the hash matches the original
    file upon completion.

    Args:
      initial_position: Initial number of bytes to read before seek.
      seek_back_amount: Number of bytes to seek backward.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertGreaterEqual(
            initial_position, seek_back_amount,
            'seek_back_amount must be less than initial position %s '
            '(but was actually: %s)' % (initial_position, seek_back_amount))
        self.assertLess(
            initial_position, tmp_file_len,
            'initial_position must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_position))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            position = 0
            while position < initial_position - TRANSFER_BUFFER_SIZE:
                data = wrapper.read(TRANSFER_BUFFER_SIZE)
                position += len(data)
            wrapper.read(initial_position - position)
            wrapper.seek(initial_position - seek_back_amount)
            self.assertEqual(wrapper.tell(),
                             initial_position - seek_back_amount)
            data = wrapper.read()
            self.assertEqual(
                len(data),
                tmp_file_len - (initial_position - seek_back_amount))
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testSeekToBeginning(self):
        for num_bytes in (TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                          TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 -
                          1, TRANSFER_BUFFER_SIZE * 2,
                          TRANSFER_BUFFER_SIZE * 2 + 1,
                          TRANSFER_BUFFER_SIZE * 3 - 1, TRANSFER_BUFFER_SIZE *
                          3, TRANSFER_BUFFER_SIZE * 3 + 1):
            self._testSeekBack(num_bytes, num_bytes)

    def testSeekBackAroundOneBuffer(self):
        for initial_position in (TRANSFER_BUFFER_SIZE + 1,
                                 TRANSFER_BUFFER_SIZE * 2 - 1,
                                 TRANSFER_BUFFER_SIZE * 2,
                                 TRANSFER_BUFFER_SIZE * 2 + 1,
                                 TRANSFER_BUFFER_SIZE * 3 - 1,
                                 TRANSFER_BUFFER_SIZE * 3,
                                 TRANSFER_BUFFER_SIZE * 3 + 1):
            for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1,
                                     TRANSFER_BUFFER_SIZE,
                                     TRANSFER_BUFFER_SIZE + 1):
                self._testSeekBack(initial_position, seek_back_amount)

    def testSeekBackMoreThanOneBuffer(self):
        for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1,
                                 TRANSFER_BUFFER_SIZE * 3 - 1,
                                 TRANSFER_BUFFER_SIZE * 3,
                                 TRANSFER_BUFFER_SIZE * 3 + 1):
            for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1,
                                     TRANSFER_BUFFER_SIZE * 2,
                                     TRANSFER_BUFFER_SIZE * 2 + 1):
                self._testSeekBack(initial_position, seek_back_amount)

    def _testSeekForward(self, initial_seek):
        """Tests seeking to an initial position and then reading.

    This function simulates an upload that is resumed after a process break.
    It seeks from zero to the initial position (as if the server already had
    those bytes). Then it reads to the end of the file, ensuring the hash
    matches the original file upon completion.

    Args:
      initial_seek: Number of bytes to initially seek.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_seek, tmp_file_len,
            'initial_seek must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_seek))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.seek(initial_seek)
            self.assertEqual(wrapper.tell(), initial_seek)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_seek)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testSeekForward(self):
        for initial_seek in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                             TRANSFER_BUFFER_SIZE + 1,
                             TRANSFER_BUFFER_SIZE * 2 - 1,
                             TRANSFER_BUFFER_SIZE * 2,
                             TRANSFER_BUFFER_SIZE * 2 + 1):
            self._testSeekForward(initial_seek)

    def _testSeekAway(self, initial_read):
        """Tests reading to an initial position and then seeking to EOF and back.

    This function simulates an size check on the input file by seeking to the
    end of the file and then back to the current position. Then it reads to
    the end of the file, ensuring the hash matches the original file upon
    completion.

    Args:
      initial_read: Number of bytes to initially read.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_read, tmp_file_len,
            'initial_read must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_read))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read(initial_read)
            self.assertEqual(wrapper.tell(), initial_read)
            wrapper.seek(0, os.SEEK_END)
            self.assertEqual(wrapper.tell(), tmp_file_len)
            wrapper.seek(initial_read, os.SEEK_SET)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_read)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testValidSeekAway(self):
        for initial_read in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                             TRANSFER_BUFFER_SIZE + 1,
                             TRANSFER_BUFFER_SIZE * 2 - 1,
                             TRANSFER_BUFFER_SIZE * 2,
                             TRANSFER_BUFFER_SIZE * 2 + 1):
            self._testSeekAway(initial_read)

    def testInvalidSeekAway(self):
        """Tests seeking to EOF and then reading without first doing a SEEK_SET."""
        tmp_file = self._GetTestFile()
        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read(TRANSFER_BUFFER_SIZE)
            wrapper.seek(0, os.SEEK_END)
            try:
                wrapper.read()
                self.fail('Expected CommandException for invalid seek.')
            except CommandException as e:
                self.assertIn(
                    'Read called on hashing file pointer in an unknown position',
                    str(e))
Example #6
0
 def _GetDefAcl(self):
   if not StorageUrlFromString(self.args[0]).IsBucket():
     raise CommandException('URL must name a bucket for the %s command' %
                            self.command_name)
   self.GetAndPrintAcl(self.args[0])
    def test_FilterExistingComponentsVersioned(self):
        """Tests upload with versionined parallel components."""

        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        mock_api.MockCreateVersionedBucket(bucket_name)

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_uploaded_correctly,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             object_uploaded_correctly.generation))
        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url,
            object_uploaded_correctly.generation, empty_object, tracker_file,
            tracker_file_lock)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate))
        duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_duplicate,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        duplicate_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             duplicate_uploaded_correctly.generation))
        args_duplicate = PerformParallelUploadFileToObjectArgs(
            fpath_duplicate, 0, 1, fpath_duplicate_url,
            duplicate_uploaded_correctly_url,
            duplicate_uploaded_correctly.generation, empty_object,
            tracker_file, tracker_file_lock)

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        object_wrong_contents = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_wrong_contents,
                                     md5Hash=fpath_wrong_contents_md5),
            contents='_')
        wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents,
             object_wrong_contents.generation))
        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              object_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_duplicate,
                              duplicate_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              wrong_contents_url.generation)
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        expected_to_delete = [(args_wrong_contents.dst_url.object_name,
                               args_wrong_contents.dst_url.generation),
                              (args_duplicate.dst_url.object_name,
                               args_duplicate.dst_url.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
Example #8
0
    def _Create(self):
        self.CheckArguments()

        # User-specified options
        pubsub_topic = None
        payload_format = None
        custom_attributes = {}
        event_types = []
        object_name_prefix = None
        should_setup_topic = True

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-e':
                    event_types.append(a)
                elif o == '-f':
                    payload_format = a
                elif o == '-m':
                    if ':' not in a:
                        raise CommandException(
                            'Custom attributes specified with -m should be of the form '
                            'key:value')
                    key, value = a.split(':')
                    custom_attributes[key] = value
                elif o == '-p':
                    object_name_prefix = a
                elif o == '-s':
                    should_setup_topic = False
                elif o == '-t':
                    pubsub_topic = a

        if payload_format not in PAYLOAD_FORMAT_MAP:
            raise CommandException(
                "Must provide a payload format with -f of either 'json' or 'none'"
            )
        payload_format = PAYLOAD_FORMAT_MAP[payload_format]

        bucket_arg = self.args[-1]

        bucket_url = StorageUrlFromString(bucket_arg)
        if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket():
            raise CommandException(
                "%s %s requires a GCS bucket name, but got '%s'" %
                (self.command_name, self.subcommand_name, bucket_arg))
        if bucket_url.scheme != 'gs':
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        bucket_name = bucket_url.bucket_name
        self.logger.debug('Creating notification for bucket %s', bucket_url)

        # Find the project this bucket belongs to
        bucket_metadata = self.gsutil_api.GetBucket(bucket_name,
                                                    fields=['projectNumber'],
                                                    provider=bucket_url.scheme)
        bucket_project_number = bucket_metadata.projectNumber

        # If not specified, choose a sensible default for the Cloud Pub/Sub topic
        # name.
        if not pubsub_topic:
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      bucket_name)
        if not pubsub_topic.startswith('projects/'):
            # If a user picks a topic ID (mytopic) but doesn't pass the whole name (
            # projects/my-project/topics/mytopic ), pick a default project.
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      pubsub_topic)
        self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic)

        just_modified_topic_permissions = False
        if should_setup_topic:
            # Ask GCS for the email address that represents GCS's permission to
            # publish to a Cloud Pub/Sub topic from this project.
            service_account = self.gsutil_api.GetProjectServiceAccount(
                bucket_project_number,
                provider=bucket_url.scheme).email_address
            self.logger.debug('Service account for project %d: %s',
                              bucket_project_number, service_account)
            just_modified_topic_permissions = self._CreateTopic(
                pubsub_topic, service_account)

        for attempt_number in range(0, 2):
            try:
                create_response = self.gsutil_api.CreateNotificationConfig(
                    bucket_name,
                    pubsub_topic=pubsub_topic,
                    payload_format=payload_format,
                    custom_attributes=custom_attributes,
                    event_types=event_types if event_types else None,
                    object_name_prefix=object_name_prefix,
                    provider=bucket_url.scheme)
                break
            except PublishPermissionDeniedException:
                if attempt_number == 0 and just_modified_topic_permissions:
                    # If we have just set the IAM policy, it may take up to 10 seconds to
                    # take effect.
                    self.logger.info(
                        'Retrying create notification in 10 seconds '
                        '(new permissions may take up to 10 seconds to take effect.)'
                    )
                    time.sleep(10)
                else:
                    raise

        notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % (
            bucket_name, create_response.id)
        self.logger.info('Created notification config %s', notification_name)

        return 0
Example #9
0
    def __iter__(self):
        """Iterates over src/dst URLs and produces a _DiffToApply sequence.

    Yields:
      The _DiffToApply.
    """
        # Strip trailing slashes, if any, so we compute tail length against
        # consistent position regardless of whether trailing slashes were included
        # or not in URL.
        base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\'))
        base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\'))
        src_url_str = dst_url_str = None
        # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
        # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
        # processed. Each time we encounter None in src_url_str or dst_url_str we
        # populate from the respective iterator, and we reset one or the other value
        # to None after yielding an action that disposes of that URL.
        while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
            if src_url_str is None:
                (src_url_str, src_size, src_crc32c,
                 src_md5) = self._ParseTmpFileLine(
                     self.sorted_src_urls_it.next())
                # Skip past base URL and normalize slashes so we can compare across
                # clouds/file systems (including Windows).
                src_url_str_to_check = _EncodeUrl(
                    src_url_str[base_src_url_len:].replace('\\', '/'))
                dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
                    self.base_src_url, StorageUrlFromString(src_url_str), True,
                    True, self.base_dst_url, False,
                    self.recursion_requested).url_string
            if self.sorted_dst_urls_it.IsEmpty():
                # We've reached end of dst URLs, so copy src to dst.
                yield _DiffToApply(src_url_str, dst_url_str_would_copy_to,
                                   _DiffAction.COPY)
                src_url_str = None
                continue
            if not dst_url_str:
                (dst_url_str, dst_size, dst_crc32c,
                 dst_md5) = (self._ParseTmpFileLine(
                     self.sorted_dst_urls_it.next()))
                # Skip past base URL and normalize slashes so we can compare acros
                # clouds/file systems (including Windows).
                dst_url_str_to_check = _EncodeUrl(
                    dst_url_str[base_dst_url_len:].replace('\\', '/'))

            if src_url_str_to_check < dst_url_str_to_check:
                # There's no dst object corresponding to src object, so copy src to dst.
                yield _DiffToApply(src_url_str, dst_url_str_would_copy_to,
                                   _DiffAction.COPY)
                src_url_str = None
            elif src_url_str_to_check > dst_url_str_to_check:
                # dst object without a corresponding src object, so remove dst if -d
                # option was specified.
                if self.delete_extras:
                    yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
                dst_url_str = None
            else:
                # There is a dst object corresponding to src object, so check if objects
                # match.
                if self._ObjectsMatch(src_url_str, src_size, src_crc32c,
                                      src_md5, dst_url_str, dst_size,
                                      dst_crc32c, dst_md5):
                    # Continue iterating without yielding a _DiffToApply.
                    pass
                else:
                    yield _DiffToApply(src_url_str, dst_url_str,
                                       _DiffAction.COPY)
                src_url_str = None
                dst_url_str = None

        # If -d option specified any files/objects left in dst iteration should be
        # removed.
        if not self.delete_extras:
            return
        if dst_url_str:
            yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
            dst_url_str = None
        for line in self.sorted_dst_urls_it:
            (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
            yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
Example #10
0
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if STORAGE_URI_REGEX.match(token):
                patterns.append(token)
                break
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, next(it)))
            else:
                patch_bindings_tuples.append(BindingStringToTuple(True, token))
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        # All following arguments are urls.
        for token in it:
            patterns.append(token)

        self.everything_set_okay = True
        self.tried_ch_on_resource_with_conditions = False
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       zip(serialized_bindings_tuples_it,
                           name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            msg = 'Some IAM policies could not be patched.'
            if self.tried_ch_on_resource_with_conditions:
                msg += '\n'
                msg += '\n'.join(
                    textwrap.wrap(
                        'Some resources had conditions present in their IAM policy '
                        'bindings, which is not supported by "iam ch". %s' %
                        (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
            raise CommandException(msg)
Example #11
0
 def setUp(self):
   super(TestAcl, self).setUp()
   self.sample_uri = self.CreateBucket()
   self.sample_url = StorageUrlFromString(str(self.sample_uri))
   self.logger = CreateGsutilLogger('acl')
Example #12
0
    def RunCommand(self):
        """Command entry point for the hash command."""
        (calc_crc32c, calc_md5, format_func, cloud_format_func,
         output_format) = (self._ParseOpts(self.sub_opts, self.logger))

        matched_one = False
        for url_str in self.args:
            for file_ref in self.WildcardIterator(url_str).IterObjects(
                    bucket_listing_fields=[
                        'crc32c',
                        'customerEncryption',
                        'md5Hash',
                        'size',
                    ]):
                matched_one = True
                url = StorageUrlFromString(url_str)
                file_name = file_ref.storage_url.object_name
                if StorageUrlFromString(url_str).IsFileUrl():
                    file_size = os.path.getsize(file_name)
                    self.gsutil_api.status_queue.put(
                        FileMessage(url,
                                    None,
                                    time.time(),
                                    size=file_size,
                                    finished=False,
                                    message_type=FileMessage.FILE_HASH))
                    callback_processor = ProgressCallbackWithTimeout(
                        file_size,
                        FileProgressCallbackHandler(
                            self.gsutil_api.status_queue,
                            src_url=StorageUrlFromString(url_str),
                            operation_name='Hashing').call)
                    hash_dict = self._GetHashClassesFromArgs(
                        calc_crc32c, calc_md5)
                    with open(file_name, 'rb') as fp:
                        hashing_helper.CalculateHashesFromContents(
                            fp,
                            hash_dict,
                            callback_processor=callback_processor)
                    self.gsutil_api.status_queue.put(
                        FileMessage(url,
                                    None,
                                    time.time(),
                                    size=file_size,
                                    finished=True,
                                    message_type=FileMessage.FILE_HASH))
                else:
                    hash_dict = {}
                    obj_metadata = file_ref.root_object
                    file_size = obj_metadata.size
                    md5_present = obj_metadata.md5Hash is not None
                    crc32c_present = obj_metadata.crc32c is not None
                    if not md5_present and not crc32c_present:
                        logging.getLogger().warn('No hashes present for %s',
                                                 url_str)
                        continue
                    if md5_present:
                        hash_dict['md5'] = obj_metadata.md5Hash
                    if crc32c_present:
                        hash_dict['crc32c'] = obj_metadata.crc32c
                print('Hashes [%s] for %s:' % (output_format, file_name))
                for name, digest in six.iteritems(hash_dict):
                    print('\tHash (%s):\t\t%s' %
                          (name, (format_func(digest) if url.IsFileUrl() else
                                  cloud_format_func(digest))))

        if not matched_one:
            raise CommandException('No files matched')
        _PutToQueueWithTimeout(self.gsutil_api.status_queue,
                               FinalMessage(time.time()))
        return 0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'])

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='afIrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'])

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Example #15
0
  def RunCommand(self):
    """Command entry point for the rm command."""
    # self.recursion_requested is initialized in command.py (so it can be
    # checked in parent class for all commands).
    self.continue_on_error = self.parallel_operations
    self.read_args_from_stdin = False
    self.all_versions = False
    if self.sub_opts:
      for o, unused_a in self.sub_opts:
        if o == '-a':
          self.all_versions = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-I':
          self.read_args_from_stdin = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
          self.all_versions = True

    if self.read_args_from_stdin:
      if self.args:
        raise CommandException('No arguments allowed with the -I flag.')
      url_strs = StdinIterator()
    else:
      if not self.args:
        raise CommandException('The rm command (without -I) expects at '
                               'least one URL.')
      url_strs = self.args

    # Tracks number of object deletes that failed.
    self.op_failure_count = 0

    # Tracks if any buckets were missing.
    self.bucket_not_found_count = 0

    # Tracks buckets that are slated for recursive deletion.
    bucket_urls_to_delete = []
    self.bucket_strings_to_delete = []

    if self.recursion_requested:
      bucket_fields = ['id']
      for url_str in url_strs:
        url = StorageUrlFromString(url_str)
        if url.IsBucket() or url.IsProvider():
          for blr in self.WildcardIterator(url_str).IterBuckets(
              bucket_fields=bucket_fields):
            bucket_urls_to_delete.append(blr.storage_url)
            self.bucket_strings_to_delete.append(url_str)

    self.preconditions = PreconditionsFromHeaders(self.headers or {})

    try:
      # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
      name_expansion_iterator = NameExpansionIterator(
          self.command_name,
          self.debug,
          self.logger,
          self.gsutil_api,
          url_strs,
          self.recursion_requested,
          project_id=self.project_id,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations)

      seek_ahead_iterator = None
      # Cannot seek ahead with stdin args, since we can only iterate them
      # once without buffering in memory.
      if not self.read_args_from_stdin:
        seek_ahead_iterator = SeekAheadNameExpansionIterator(
            self.command_name,
            self.debug,
            self.GetSeekAheadGsutilApi(),
            url_strs,
            self.recursion_requested,
            all_versions=self.all_versions,
            project_id=self.project_id)

      # Perform remove requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(_RemoveFuncWrapper,
                 name_expansion_iterator,
                 _RemoveExceptionHandler,
                 fail_on_error=(not self.continue_on_error),
                 shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                 seek_ahead_iterator=seek_ahead_iterator)

    # Assuming the bucket has versioning enabled, url's that don't map to
    # objects should throw an error even with all_versions, since the prior
    # round of deletes only sends objects to a history table.
    # This assumption that rm -a is only called for versioned buckets should be
    # corrected, but the fix is non-trivial.
    except CommandException as e:
      # Don't raise if there are buckets to delete -- it's valid to say:
      #   gsutil rm -r gs://some_bucket
      # if the bucket is empty.
      if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e):
        DecrementFailureCount()
      else:
        raise
    except ServiceException as e:
      if not self.continue_on_error:
        raise

    if self.bucket_not_found_count:
      raise CommandException('Encountered non-existent bucket during listing')

    if self.op_failure_count and not self.continue_on_error:
      raise CommandException('Some files could not be removed.')

    # If this was a gsutil rm -r command covering any bucket subdirs,
    # remove any dir_$folder$ objects (which are created by various web UI
    # tools to simulate folders).
    if self.recursion_requested:
      folder_object_wildcards = []
      for url_str in url_strs:
        url = StorageUrlFromString(url_str)
        if url.IsObject():
          folder_object_wildcards.append('%s**_$folder$' % url_str)
      if folder_object_wildcards:
        self.continue_on_error = True
        try:
          name_expansion_iterator = NameExpansionIterator(
              self.command_name,
              self.debug,
              self.logger,
              self.gsutil_api,
              folder_object_wildcards,
              self.recursion_requested,
              project_id=self.project_id,
              all_versions=self.all_versions)
          # When we're removing folder objects, always continue on error
          self.Apply(_RemoveFuncWrapper,
                     name_expansion_iterator,
                     _RemoveFoldersExceptionHandler,
                     fail_on_error=False)
        except CommandException as e:
          # Ignore exception from name expansion due to an absent folder file.
          if not e.reason.startswith(NO_URLS_MATCHED_PREFIX):
            raise

    # Now that all data has been deleted, delete any bucket URLs.
    for url in bucket_urls_to_delete:
      self.logger.info('Removing %s...', url)

      @Retry(NotEmptyException, tries=3, timeout_secs=1)
      def BucketDeleteWithRetry():
        self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme)

      BucketDeleteWithRetry()

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException('%d file%s/object%s could not be removed.' %
                             (self.op_failure_count, plural_str, plural_str))

    return 0
  def RunCommand(self):
    """Command entry point for the du command."""
    self.line_ending = '\n'
    self.all_versions = False
    self.produce_total = False
    self.human_readable = False
    self.summary_only = False
    self.exclude_patterns = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-0':
          self.line_ending = '\0'
        elif o == '-a':
          self.all_versions = True
        elif o == '-c':
          self.produce_total = True
        elif o == '-e':
          self.exclude_patterns.append(a)
        elif o == '-h':
          self.human_readable = True
        elif o == '-s':
          self.summary_only = True
        elif o == '-X':
          if a == '-':
            f = sys.stdin
            f_close = False
          else:
            f = open(a, 'r') if six.PY2 else open(a, 'r', encoding=UTF8)
            f_close = True
          self.exclude_patterns = [six.ensure_text(line.strip()) for line in f]
          if f_close:
            f.close()

    if not self.args:
      # Default to listing all gs buckets.
      self.args = ['gs://']

    total_bytes = 0
    got_nomatch_errors = False

    def _PrintObjectLong(blr):
      return self._PrintInfoAboutBucketListingRef(blr)

    def _PrintNothing(unused_blr=None):
      pass

    def _PrintDirectory(num_bytes, blr):
      if not self.summary_only:
        self._PrintSummaryLine(num_bytes, blr.url_string.encode(UTF8))

    for url_arg in self.args:
      top_level_storage_url = StorageUrlFromString(url_arg)
      if top_level_storage_url.IsFileUrl():
        raise CommandException('Only cloud URLs are supported for %s' %
                               self.command_name)
      bucket_listing_fields = ['size']

      listing_helper = ls_helper.LsHelper(
          self.WildcardIterator,
          self.logger,
          print_object_func=_PrintObjectLong,
          print_dir_func=_PrintNothing,
          print_dir_header_func=_PrintNothing,
          print_dir_summary_func=_PrintDirectory,
          print_newline_func=_PrintNothing,
          all_versions=self.all_versions,
          should_recurse=True,
          exclude_patterns=self.exclude_patterns,
          fields=bucket_listing_fields)

      # LsHelper expands to objects and prefixes, so perform a top-level
      # expansion first.
      if top_level_storage_url.IsProvider():
        # Provider URL: use bucket wildcard to iterate over all buckets.
        top_level_iter = self.WildcardIterator(
            '%s://*' %
            top_level_storage_url.scheme).IterBuckets(bucket_fields=['id'])
      elif top_level_storage_url.IsBucket():
        top_level_iter = self.WildcardIterator(
            '%s://%s' % (top_level_storage_url.scheme,
                         top_level_storage_url.bucket_name)).IterBuckets(
                             bucket_fields=['id'])
      else:
        top_level_iter = [BucketListingObject(top_level_storage_url)]

      for blr in top_level_iter:
        storage_url = blr.storage_url
        if storage_url.IsBucket() and self.summary_only:
          storage_url = StorageUrlFromString(
              storage_url.CreatePrefixUrl(wildcard_suffix='**'))
        _, exp_objs, exp_bytes = listing_helper.ExpandUrlAndPrint(storage_url)
        if (storage_url.IsObject() and exp_objs == 0 and
            ContainsWildcard(url_arg) and not self.exclude_patterns):
          got_nomatch_errors = True
        total_bytes += exp_bytes

        if self.summary_only:
          self._PrintSummaryLine(exp_bytes,
                                 blr.url_string.rstrip('/').encode(UTF8))

    if self.produce_total:
      self._PrintSummaryLine(total_bytes, 'total')

    if got_nomatch_errors:
      raise CommandException('One or more URLs matched no objects.')

    return 0
Example #17
0
  def RunCommand(self):
    """Command entry point for the setmeta command."""
    headers = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-h':
          if 'x-goog-acl' in a or 'x-amz-acl' in a:
            raise CommandException(
                'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                'set ... to set canned ACLs.')
          headers.append(a)

    (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

    self.metadata_change = metadata_plus
    for header in metadata_minus:
      self.metadata_change[header] = ''

    if len(self.args) == 1 and not self.recursion_requested:
      url = StorageUrlFromString(self.args[0])
      if not (url.IsCloudUrl() and url.IsObject()):
        raise CommandException('URL (%s) must name an object' % self.args[0])

    # Used to track if any objects' metadata failed to be set.
    self.everything_set_okay = True

    self.preconditions = PreconditionsFromHeaders(self.headers)

    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        continue_on_error=self.parallel_operations,
        bucket_listing_fields=['generation', 'metadata', 'metageneration'])

    seek_ahead_iterator = SeekAheadNameExpansionIterator(
        self.command_name,
        self.debug,
        self.GetSeekAheadGsutilApi(),
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        project_id=self.project_id)

    try:
      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(_SetMetadataFuncWrapper,
                 name_expansion_iterator,
                 _SetMetadataExceptionHandler,
                 fail_on_error=True,
                 seek_ahead_iterator=seek_ahead_iterator)
    except AccessDeniedException as e:
      if e.status == 403:
        self._WarnServiceAccounts()
      raise

    if not self.everything_set_okay:
      raise CommandException('Metadata for some objects could not be set.')

    return 0
Example #18
0
    def RunCommand(self):
        """Command entry point for the compose command."""
        target_url_str = self.args[-1]
        self.args = self.args[:-1]
        target_url = StorageUrlFromString(target_url_str)
        self.CheckProvider(target_url)
        if target_url.HasGeneration():
            raise CommandException(
                'A version-specific URL (%s) cannot be '
                'the destination for gsutil compose - abort.' % target_url)

        dst_obj_metadata = apitools_messages.Object(
            name=target_url.object_name, bucket=target_url.bucket_name)

        components = []
        # Remember the first source object so we can get its content type.
        first_src_url = None
        for src_url_str in self.args:
            if ContainsWildcard(src_url_str):
                src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
            else:
                src_url_iter = [
                    BucketListingObject(StorageUrlFromString(src_url_str))
                ]
            for blr in src_url_iter:
                src_url = blr.storage_url
                self.CheckProvider(src_url)

                if src_url.bucket_name != target_url.bucket_name:
                    raise CommandException(
                        'GCS does not support inter-bucket composing.')

                if not first_src_url:
                    first_src_url = src_url
                src_obj_metadata = (apitools_messages.ComposeRequest.
                                    SourceObjectsValueListEntry(
                                        name=src_url.object_name))
                if src_url.HasGeneration():
                    src_obj_metadata.generation = src_url.generation
                components.append(src_obj_metadata)
                # Avoid expanding too many components, and sanity check each name
                # expansion result.
                if len(components) > MAX_COMPOSE_ARITY:
                    raise CommandException(
                        '"compose" called with too many component '
                        'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

        if not components:
            raise CommandException(
                '"compose" requires at least 1 component object.')

        dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
            first_src_url.bucket_name,
            first_src_url.object_name,
            provider=first_src_url.scheme,
            fields=['contentType']).contentType

        preconditions = PreconditionsFromHeaders(self.headers or {})

        self.logger.info('Composing %s from %d component object(s).',
                         target_url, len(components))
        self.gsutil_api.ComposeObject(
            components,
            dst_obj_metadata,
            preconditions=preconditions,
            provider=target_url.scheme,
            encryption_tuple=GetEncryptionKeyWrapper(config))
    def test_FilterExistingComponentsNonVersioned(self):
        """Tests upload with a variety of component states."""
        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_uploaded_correctly,
            md5Hash=fpath_uploaded_correctly_md5),
                                              contents='1')

        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url, '', empty_object, tracker_file,
            tracker_file_lock)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded))
        object_not_uploaded_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_not_uploaded))
        args_not_uploaded = PerformParallelUploadFileToObjectArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded_url,
            object_not_uploaded_url, '', empty_object, tracker_file,
            tracker_file_lock)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        object_wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_wrong_contents,
            md5Hash=fpath_wrong_contents_md5),
                                              contents='1')

        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            object_wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        fpath_remote_deleted_url = StorageUrlFromString(
            str(fpath_remote_deleted))
        args_remote_deleted = PerformParallelUploadFileToObjectArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '',
            empty_object, tracker_file, tracker_file_lock)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        with open(fpath_no_longer_used) as f_in:
            file_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name, name='foo6', md5Hash=file_md5),
                                              contents='6')

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(1, len(uploaded_components))
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        self.assertEqual(1, len(existing_objects_to_delete))
        no_longer_used_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_no_longer_used))
        self.assertEqual(no_longer_used_url.url_string,
                         existing_objects_to_delete[0].url_string)
Example #20
0
    def __iter__(self,
                 bucket_listing_fields=None,
                 expand_top_level_buckets=False):
        """Iterator that gets called when iterating over the cloud wildcard.

    In the case where no wildcard is present, returns a single matching object,
    single matching prefix, or one of each if both exist.

    Args:
      bucket_listing_fields: Iterable fields to include in bucket listings.
                             Ex. ['name', 'acl'].  Iterator is
                             responsible for converting these to list-style
                             format ['items/name', 'items/acl'] as well as
                             adding any fields necessary for listing such as
                             prefixes.  API implementation is responsible for
                             adding pagination fields.  If this is None,
                             all fields are returned.
      expand_top_level_buckets: If true, yield no BUCKET references.  Instead,
                                expand buckets into top-level objects and
                                prefixes.

    Yields:
      BucketListingRef of type BUCKET, OBJECT or PREFIX.
    """
        single_version_request = self.wildcard_url.HasGeneration()

        # For wildcard expansion purposes, we need at a minimum the name of
        # each object and prefix.  If we're not using the default of requesting
        # all fields, make sure at least these are requested.  The Cloud API
        # tolerates specifying the same field twice.
        get_fields = None
        if bucket_listing_fields:
            get_fields = set()
            for field in bucket_listing_fields:
                get_fields.add(field)
            bucket_listing_fields = self._GetToListFields(
                get_fields=bucket_listing_fields)
            bucket_listing_fields.update(['items/name', 'prefixes'])
            get_fields.update(['name'])
            # If we're making versioned requests, ensure generation and
            # metageneration are also included.
            if single_version_request or self.all_versions:
                bucket_listing_fields.update(
                    ['items/generation', 'items/metageneration'])
                get_fields.update(['generation', 'metageneration'])

        # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
        # iterate over the expanded bucket strings and handle any object
        # wildcarding.
        for bucket_listing_ref in self._ExpandBucketWildcards(
                bucket_fields=['id']):
            bucket_url_string = bucket_listing_ref.url_string
            if self.wildcard_url.IsBucket():
                # IsBucket() guarantees there are no prefix or object wildcards, and
                # thus this is a top-level listing of buckets.
                if expand_top_level_buckets:
                    url = StorageUrlFromString(bucket_url_string)
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            delimiter='/',
                            all_versions=self.all_versions,
                            provider=self.wildcard_url.scheme,
                            fields=bucket_listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            yield self._GetObjectRef(
                                bucket_url_string,
                                obj_or_prefix.data,
                                with_version=self.all_versions)
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX:
                            yield self._GetPrefixRef(bucket_url_string,
                                                     obj_or_prefix.data)
                else:
                    yield bucket_listing_ref
            else:
                # By default, assume a non-wildcarded URL is an object, not a prefix.
                # This prevents unnecessary listings (which are slower, more expensive,
                # and also subject to eventual consistency).
                if (not ContainsWildcard(self.wildcard_url.url_string)
                        and self.wildcard_url.IsObject()
                        and not self.all_versions):
                    try:
                        get_object = self.gsutil_api.GetObjectMetadata(
                            self.wildcard_url.bucket_name,
                            self.wildcard_url.object_name,
                            generation=self.wildcard_url.generation,
                            provider=self.wildcard_url.scheme,
                            fields=get_fields)
                        yield self._GetObjectRef(
                            self.wildcard_url.bucket_url_string,
                            get_object,
                            with_version=(self.all_versions
                                          or single_version_request))
                        return
                    except (NotFoundException, AccessDeniedException):
                        # It's possible this is a prefix - try to list instead.
                        pass

                # Expand iteratively by building prefix/delimiter bucket listing
                # request, filtering the results per the current level's wildcard
                # (if present), and continuing with the next component of the
                # wildcard. See _BuildBucketFilterStrings() documentation for details.
                if single_version_request:
                    url_string = '%s%s#%s' % (bucket_url_string,
                                              self.wildcard_url.object_name,
                                              self.wildcard_url.generation)
                else:
                    # Rstrip any prefixes to correspond with rstripped prefix wildcard
                    # from _BuildBucketFilterStrings().
                    url_string = '%s%s' % (
                        bucket_url_string,
                        StripOneSlash(self.wildcard_url.object_name) or '/'
                    )  # Cover root object named '/' case.
                urls_needing_expansion = [url_string]
                while urls_needing_expansion:
                    url = StorageUrlFromString(urls_needing_expansion.pop(0))
                    (prefix, delimiter, prefix_wildcard,
                     suffix_wildcard) = (self._BuildBucketFilterStrings(
                         url.object_name))
                    regex_patterns = self._GetRegexPatterns(prefix_wildcard)

                    # If we have a suffix wildcard, we only care about listing prefixes.
                    listing_fields = (set(['prefixes']) if suffix_wildcard else
                                      bucket_listing_fields)

                    # List bucket for objects matching prefix up to delimiter.
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            prefix=prefix,
                            delimiter=delimiter,
                            all_versions=self.all_versions
                            or single_version_request,
                            provider=self.wildcard_url.scheme,
                            fields=listing_fields):
                        for pattern in regex_patterns:
                            if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                                gcs_object = obj_or_prefix.data
                                if pattern.match(gcs_object.name):
                                    if not suffix_wildcard or (
                                            StripOneSlash(gcs_object.name)
                                            == suffix_wildcard):
                                        if not single_version_request or (
                                                self._SingleVersionMatches(
                                                    gcs_object.generation)):
                                            yield self._GetObjectRef(
                                                bucket_url_string,
                                                gcs_object,
                                                with_version=(
                                                    self.all_versions
                                                    or single_version_request))
                                    break
                            else:  # CloudApi.CsObjectOrPrefixType.PREFIX
                                prefix = obj_or_prefix.data

                                if ContainsWildcard(prefix):
                                    # TODO: Disambiguate user-supplied strings from iterated
                                    # prefix and object names so that we can better reason
                                    # about wildcards and handle this case without raising
                                    # an error.
                                    raise CommandException(
                                        'Cloud folder %s%s contains a wildcard; gsutil does '
                                        'not currently support objects with wildcards in their '
                                        'name.' % (bucket_url_string, prefix))

                                # If the prefix ends with a slash, remove it.  Note that we only
                                # remove one slash so that we can successfully enumerate dirs
                                # containing multiple slashes.
                                rstripped_prefix = StripOneSlash(prefix)
                                if pattern.match(rstripped_prefix):
                                    if suffix_wildcard and rstripped_prefix != suffix_wildcard:
                                        # There's more wildcard left to expand.
                                        url_append_string = '%s%s' % (
                                            bucket_url_string, rstripped_prefix
                                            + '/' + suffix_wildcard)
                                        urls_needing_expansion.append(
                                            url_append_string)
                                    else:
                                        # No wildcard to expand, just yield the prefix.
                                        yield self._GetPrefixRef(
                                            bucket_url_string, prefix)
                                    break
Example #21
0
  def RunCommand(self):
    """Command entry point for the ls command."""
    got_nomatch_errors = False
    got_bucket_nomatch_errors = False
    listing_style = ListingStyle.SHORT
    get_bucket_info = False
    self.recursion_requested = False
    self.all_versions = False
    self.include_etag = False
    self.human_readable = False
    self.list_subdir_contents = True
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-a':
          self.all_versions = True
        elif o == '-e':
          self.include_etag = True
        elif o == '-b':
          get_bucket_info = True
        elif o == '-h':
          self.human_readable = True
        elif o == '-l':
          listing_style = ListingStyle.LONG
        elif o == '-L':
          listing_style = ListingStyle.LONG_LONG
        elif o == '-p':
          # Project IDs are sent as header values when using gs and s3 XML APIs.
          InsistAscii(a, 'Invalid non-ASCII character found in project ID')
          self.project_id = a
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
        elif o == '-d':
          self.list_subdir_contents = False

    if not self.args:
      # default to listing all gs buckets
      self.args = ['gs://']

    total_objs = 0
    total_bytes = 0

    def MaybePrintBucketHeader(blr):
      if len(self.args) > 1:
        print '%s:' % blr.url_string.encode(UTF8)
    print_bucket_header = MaybePrintBucketHeader

    for url_str in self.args:
      storage_url = StorageUrlFromString(url_str)
      if storage_url.IsFileUrl():
        raise CommandException('Only cloud URLs are supported for %s'
                               % self.command_name)
      bucket_fields = None
      if (listing_style == ListingStyle.SHORT or
          listing_style == ListingStyle.LONG):
        bucket_fields = ['id']
      elif listing_style == ListingStyle.LONG_LONG:
        bucket_fields = ['acl',
                         'cors',
                         'defaultObjectAcl',
                         'labels',
                         'location',
                         'logging',
                         'lifecycle',
                         'metageneration',
                         'storageClass',
                         'timeCreated',
                         'updated',
                         'versioning',
                         'website']
      if storage_url.IsProvider():
        # Provider URL: use bucket wildcard to list buckets.
        for blr in self.WildcardIterator(
            '%s://*' % storage_url.scheme).IterBuckets(
                bucket_fields=bucket_fields):
          self._PrintBucketInfo(blr, listing_style)
      elif storage_url.IsBucket() and get_bucket_info:
        # ls -b bucket listing request: List info about bucket(s).
        total_buckets = 0
        for blr in self.WildcardIterator(url_str).IterBuckets(
            bucket_fields=bucket_fields):
          if not ContainsWildcard(url_str) and not blr.root_object:
            # Iterator does not make an HTTP call for non-wildcarded
            # listings with fields=='id'. Ensure the bucket exists by calling
            # GetBucket.
            self.gsutil_api.GetBucket(
                blr.storage_url.bucket_name,
                fields=['id'], provider=storage_url.scheme)
          self._PrintBucketInfo(blr, listing_style)
          total_buckets += 1
        if not ContainsWildcard(url_str) and not total_buckets:
          got_bucket_nomatch_errors = True
      else:
        # URL names a bucket, object, or object subdir ->
        # list matching object(s) / subdirs.
        def _PrintPrefixLong(blr):
          print '%-33s%s' % ('', blr.url_string.encode(UTF8))

        if listing_style == ListingStyle.SHORT:
          # ls helper by default readies us for a short listing.
          ls_helper = LsHelper(self.WildcardIterator, self.logger,
                               all_versions=self.all_versions,
                               print_bucket_header_func=print_bucket_header,
                               should_recurse=self.recursion_requested,
                               list_subdir_contents=self.list_subdir_contents)
        elif listing_style == ListingStyle.LONG:
          bucket_listing_fields = ['name', 'timeCreated', 'updated', 'size']
          if self.all_versions:
            bucket_listing_fields.extend(['generation', 'metageneration'])
          if self.include_etag:
            bucket_listing_fields.append('etag')

          ls_helper = LsHelper(self.WildcardIterator, self.logger,
                               print_object_func=self._PrintLongListing,
                               print_dir_func=_PrintPrefixLong,
                               print_bucket_header_func=print_bucket_header,
                               all_versions=self.all_versions,
                               should_recurse=self.recursion_requested,
                               fields=bucket_listing_fields,
                               list_subdir_contents=self.list_subdir_contents)

        elif listing_style == ListingStyle.LONG_LONG:
          # List all fields
          bucket_listing_fields = (UNENCRYPTED_FULL_LISTING_FIELDS +
                                   ENCRYPTED_FIELDS)
          ls_helper = LsHelper(self.WildcardIterator, self.logger,
                               print_object_func=PrintFullInfoAboutObject,
                               print_dir_func=_PrintPrefixLong,
                               print_bucket_header_func=print_bucket_header,
                               all_versions=self.all_versions,
                               should_recurse=self.recursion_requested,
                               fields=bucket_listing_fields,
                               list_subdir_contents=self.list_subdir_contents)
        else:
          raise CommandException('Unknown listing style: %s' % listing_style)

        exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
        if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
          got_nomatch_errors = True
        total_bytes += exp_bytes
        total_objs += exp_objs

    if total_objs and listing_style != ListingStyle.SHORT:
      print ('TOTAL: %d objects, %d bytes (%s)' %
             (total_objs, total_bytes, MakeHumanReadable(float(total_bytes))))
    if got_nomatch_errors:
      raise CommandException('One or more URLs matched no objects.')
    if got_bucket_nomatch_errors:
      raise NotFoundException('One or more bucket URLs matched no buckets.')

    return 0
Example #22
0
    def __iter__(self, bucket_listing_fields=None):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Args:
      bucket_listing_fields: Iterable fields to include in listings.
          Ex. ['size']. Currently only 'size' is supported.
          If present, will populate yielded BucketListingObject.root_object
          with the file name and size.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        include_size = (bucket_listing_fields
                        and 'size' in set(bucket_listing_fields))

        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if self.ignore_symlinks and os.path.islink(filepath):
                    if self.logger:
                        self.logger.info('Skipping symbolic link %s...',
                                         filepath)
                    continue
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    blr_object = _GetFileObject(
                        filepath) if include_size else None
                    yield BucketListingObject(expanded_url,
                                              root_object=blr_object)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
Example #23
0
  def _SetIam(self):
    """Set IAM policy for given wildcards on the command line."""

    self.continue_on_error = False
    self.recursion_requested = False
    self.all_versions = False
    force_etag = False
    etag = ''
    if self.sub_opts:
      for o, arg in self.sub_opts:
        if o in ['-r', '-R']:
          self.recursion_requested = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-a':
          self.all_versions = True
        elif o == '-e':
          etag = str(arg)
          force_etag = True
        else:
          self.RaiseInvalidArgumentException()

    file_url = self.args[0]
    patterns = self.args[1:]

    # Load the IAM policy file and raise error if the file is invalid JSON or
    # does not exist.
    try:
      with open(file_url, 'r') as fp:
        policy = json.loads(fp.read())
    except IOError:
      raise ArgumentException(
          'Specified IAM policy file "%s" does not exist.' % file_url)
    except ValueError:
      raise ArgumentException(
          'Invalid IAM policy file "%s".' % file_url)

    bindings = policy.get('bindings', [])
    if not force_etag:
      etag = policy.get('etag', '')

    policy_json = json.dumps({'bindings': bindings, 'etag': etag})
    try:
      policy = protojson.decode_message(apitools_messages.Policy, policy_json)
    except DecodeError:
      raise ArgumentException(
          'Invalid IAM policy file "%s" or etag "%s".' % (file_url, etag))

    self.everything_set_okay = True

    # This list of wildcard strings will be handled by NameExpansionIterator.
    threaded_wildcards = []

    for pattern in patterns:
      surl = StorageUrlFromString(pattern)
      if surl.IsBucket():
        if self.recursion_requested:
          surl.object_name = '*'
          threaded_wildcards.append(surl.url_string)
        else:
          self.SetIamHelper(surl, policy)
      else:
        threaded_wildcards.append(surl.url_string)

    # N.B.: If threaded_wildcards contains a non-existent bucket
    # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
    # will raise an exception in iter.next. This halts all iteration, even
    # when -f is set. This behavior is also evident in acl set. This behavior
    # also appears for any exception that will be raised when iterating over
    # wildcard expansions (access denied if bucket cannot be listed, etc.).
    if threaded_wildcards:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name, self.debug,
          self.logger, self.gsutil_api,
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=['name'])

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name, self.debug, self.GetSeekAheadGsutilApi(),
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions)

      # We cannot curry policy along due to a Python2.6 bug; see comments in
      # IamCommand._PatchIam for more information.
      policy_it = itertools.repeat(protojson.encode_message(policy))
      self.Apply(
          _SetIamWrapper,
          itertools.izip(
              policy_it, name_expansion_iterator),
          _SetIamExceptionHandler,
          fail_on_error=not self.continue_on_error,
          seek_ahead_iterator=seek_ahead_iterator)

      self.everything_set_okay &= not GetFailureCount() > 0

    # TODO: Add an error counter for files and objects.
    if not self.everything_set_okay:
      raise CommandException('Some IAM policies could not be set.')
Example #24
0
    def RunCommand(self):
        """Command entry point for the mb command."""
        bucket_policy_only = None
        location = None
        storage_class = None
        seconds = None
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-l':
                    location = a
                elif o == '-p':
                    # Project IDs are sent as header values when using gs and s3 XML APIs.
                    InsistAscii(
                        a, 'Invalid non-ASCII character found in project ID')
                    self.project_id = a
                elif o == '-c' or o == '-s':
                    storage_class = NormalizeStorageClass(a)
                elif o == '--retention':
                    seconds = RetentionInSeconds(a)
                elif o == '-b':
                    if self.gsutil_api.GetApiSelector(
                            'gs') != ApiSelector.JSON:
                        raise CommandException(
                            'The -b <on|off> option '
                            'can only be used with the JSON API')
                    InsistOnOrOff(
                        a, 'Only on and off values allowed for -b option')
                    bucket_policy_only = (a == 'on')

        bucket_metadata = apitools_messages.Bucket(location=location,
                                                   storageClass=storage_class)
        if bucket_policy_only:
            bucket_metadata.iamConfiguration = IamConfigurationValue()
            iam_config = bucket_metadata.iamConfiguration
            iam_config.bucketPolicyOnly = BucketPolicyOnlyValue()
            iam_config.bucketPolicyOnly.enabled = bucket_policy_only

        for bucket_url_str in self.args:
            bucket_url = StorageUrlFromString(bucket_url_str)
            if seconds is not None:
                if bucket_url.scheme != 'gs':
                    raise CommandException(
                        'Retention policy can only be specified for '
                        'GCS buckets.')
                retention_policy = (
                    apitools_messages.Bucket.RetentionPolicyValue(
                        retentionPeriod=seconds))
                bucket_metadata.retentionPolicy = retention_policy

            if not bucket_url.IsBucket():
                raise CommandException(
                    'The mb command requires a URL that specifies a '
                    'bucket.\n"%s" is not valid.' % bucket_url)
            if (not BUCKET_NAME_RE.match(bucket_url.bucket_name)
                    or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)):
                raise InvalidUrlError('Invalid bucket name in URL "%s"' %
                                      bucket_url.bucket_name)

            self.logger.info('Creating %s...', bucket_url)
            # Pass storage_class param only if this is a GCS bucket. (In S3 the
            # storage class is specified on the key object.)
            try:
                self.gsutil_api.CreateBucket(bucket_url.bucket_name,
                                             project_id=self.project_id,
                                             metadata=bucket_metadata,
                                             provider=bucket_url.scheme)
            except BadRequestException as e:
                if (e.status == 400
                        and e.reason == 'DotfulBucketNameNotUnderTld'
                        and bucket_url.scheme == 'gs'):
                    bucket_name = bucket_url.bucket_name
                    final_comp = bucket_name[bucket_name.rfind('.') + 1:]
                    raise CommandException('\n'.join(
                        textwrap.wrap(
                            'Buckets with "." in the name must be valid DNS names. The bucket'
                            ' you are attempting to create (%s) is not a valid DNS name,'
                            ' because the final component (%s) is not currently a valid part'
                            ' of the top-level DNS tree.' %
                            (bucket_name, final_comp))))
                else:
                    raise

        return 0
Example #25
0
  def _PatchIam(self):
    self.continue_on_error = False
    self.recursion_requested = False

    patch_bindings_tuples = []

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o in ['-r', '-R']:
          self.recursion_requested = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-d':
          patch_bindings_tuples.append(BindingStringToTuple(False, a))

    patterns = []

    # N.B.: self.sub_opts stops taking in options at the first non-flagged
    # token. The rest of the tokens are sent to self.args. Thus, in order to
    # handle input of the form "-d <binding> <binding> <url>", we will have to
    # parse self.args for a mix of both bindings and CloudUrls. We are not
    # expecting to come across the -r, -f flags here.
    it = iter(self.args)
    for token in it:
      if STORAGE_URI_REGEX.match(token):
        patterns.append(token)
        break
      if token == '-d':
        patch_bindings_tuples.append(BindingStringToTuple(False, next(it)))
      else:
        patch_bindings_tuples.append(BindingStringToTuple(True, token))
    if not patch_bindings_tuples:
      raise CommandException('Must specify at least one binding.')

    # All following arguments are urls.
    for token in it:
      patterns.append(token)

    self.everything_set_okay = True
    self.tried_ch_on_resource_with_conditions = False
    threaded_wildcards = []
    for pattern in patterns:
      surl = StorageUrlFromString(pattern)
      try:
        if surl.IsBucket():
          if self.recursion_requested:
            surl.object = '*'
            threaded_wildcards.append(surl.url_string)
          else:
            self.PatchIamHelper(surl, patch_bindings_tuples)
        else:
          threaded_wildcards.append(surl.url_string)
      except AttributeError:
        error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
        if set(surl.object_name).issubset(set('-Rrf')):
          error_msg += (
              ' This resource handle looks like a flag, which must appear '
              'before all bindings. See "gsutil help iam ch" for more details.')
        raise CommandException(error_msg)

    if threaded_wildcards:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name,
          self.debug,
          self.logger,
          self.gsutil_api,
          threaded_wildcards,
          self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=['name'])

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          threaded_wildcards,
          self.recursion_requested,
          all_versions=self.all_versions)

      serialized_bindings_tuples_it = itertools.repeat(
          [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
      self.Apply(_PatchIamWrapper,
                 zip(serialized_bindings_tuples_it, name_expansion_iterator),
                 _PatchIamExceptionHandler,
                 fail_on_error=not self.continue_on_error,
                 seek_ahead_iterator=seek_ahead_iterator)

      self.everything_set_okay &= not GetFailureCount() > 0

    # TODO: Add an error counter for files and objects.
    if not self.everything_set_okay:
      msg = 'Some IAM policies could not be patched.'
      if self.tried_ch_on_resource_with_conditions:
        msg += '\n'
        msg += '\n'.join(
            textwrap.wrap(
                'Some resources had conditions present in their IAM policy '
                'bindings, which is not supported by "iam ch". %s' %
                (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
      raise CommandException(msg)
Example #26
0
def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
  """Print full info for given object (like what displays for gsutil ls -L).

  Args:
    bucket_listing_ref: BucketListingRef being listed.
                        Must have ref_type OBJECT and a populated root_object
                        with the desired fields.
    incl_acl: True if ACL info should be output.

  Returns:
    Tuple (number of objects, object_length)

  Raises:
    Exception: if calling bug encountered.
  """
  url_str = bucket_listing_ref.url_string
  storage_url = StorageUrlFromString(url_str)
  obj = bucket_listing_ref.root_object

  if (obj.metadata and S3_DELETE_MARKER_GUID in
      obj.metadata.additionalProperties):
    num_bytes = 0
    num_objs = 0
    url_str += '<DeleteMarker>'
  else:
    num_bytes = obj.size
    num_objs = 1

  print '%s:' % url_str.encode(UTF8)
  if obj.updated:
    print '\tCreation time:\t\t%s' % obj.updated.strftime(
        '%a, %d %b %Y %H:%M:%S GMT')
  if obj.cacheControl:
    print '\tCache-Control:\t\t%s' % obj.cacheControl
  if obj.contentDisposition:
    print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
  if obj.contentEncoding:
    print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
  if obj.contentLanguage:
    print '\tContent-Language:\t%s' % obj.contentLanguage
  print '\tContent-Length:\t\t%s' % obj.size
  print '\tContent-Type:\t\t%s' % obj.contentType
  if obj.componentCount:
    print '\tComponent-Count:\t%d' % obj.componentCount
  marker_props = {}
  if obj.metadata and obj.metadata.additionalProperties:
    non_marker_props = []
    for add_prop in obj.metadata.additionalProperties:
      if add_prop.key not in S3_MARKER_GUIDS:
        non_marker_props.append(add_prop)
      else:
        marker_props[add_prop.key] = add_prop.value
    if non_marker_props:
      print '\tMetadata:'
      for ap in non_marker_props:
        meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
        print meta_string.encode(UTF8)
  if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
  if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
  print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
  if obj.generation:
    generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
    print '\tGeneration:\t\t%s' % generation_str
  if obj.metageneration:
    print '\tMetageneration:\t\t%s' % obj.metageneration
  if incl_acl:
    # JSON API won't return acls as part of the response unless we have
    # full control scope
    if obj.acl:
      print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
    elif S3_ACL_MARKER_GUID in marker_props:
      print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
    else:
      print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
             'permission\n\t\t\t\ton the object to read its ACL.')

  return (num_objs, num_bytes)
Example #27
0
  def _SetIam(self):
    """Set IAM policy for given wildcards on the command line."""

    self.continue_on_error = False
    self.recursion_requested = False
    self.all_versions = False
    if self.sub_opts:
      for o, unused_a in self.sub_opts:
        if o in ['-r', '-R']:
          self.recursion_requested = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-a':
          self.all_versions = True
        else:
          self.RaiseInvalidArgumentException()

    file_url = self.args[0]
    patterns = self.args[1:]

    # Load the IAM policy file and raise error if the file is invalid JSON or
    # does not exist.
    try:
      with open(file_url, 'r') as fp:
        bindings = json.loads(fp.read())
    except (IOError, ValueError):
      raise ArgumentException('Invalid IAM policy file "%s".' % file_url)

    policy = apitools_messages.Policy(bindings=bindings)

    self.everything_set_okay = True

    # This list of wildcard strings will be handled by NameExpansionIterator.
    threaded_wildcards = []

    for pattern in patterns:
      surl = StorageUrlFromString(pattern)
      if surl.IsBucket():
        if self.recursion_requested:
          surl.object_name = '*'
          threaded_wildcards.append(surl.url_string)
        else:
          self.SetIamHelper(surl, policy)
      else:
        threaded_wildcards.append(surl.url_string)

    # N.B.: If threaded_wildcards contains a non-existent bucket
    # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
    # will raise an exception in iter.next. This halts all iteration, even
    # when -f is set. This behavior is also evident in acl set. This behavior
    # also appears for any exception that will be raised when iterating over
    # wildcard expansions (access denied if bucket cannot be listed, etc.).
    if threaded_wildcards:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name, self.debug,
          self.logger, self.gsutil_api,
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=['name'])

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name, self.debug, self.GetSeekAheadGsutilApi(),
          threaded_wildcards, self.recursion_requested,
          all_versions=self.all_versions)

      # We cannot curry policy along due to a Python2.6 bug; see comments in
      # IamCommand._PatchIam for more information.
      policy_it = itertools.repeat(protojson.encode_message(policy))
      self.Apply(
          _SetIamWrapper,
          itertools.izip(
              policy_it, name_expansion_iterator),
          _SetIamExceptionHandler,
          fail_on_error=not self.continue_on_error,
          seek_ahead_iterator=seek_ahead_iterator)

      self.everything_set_okay &= not GetFailureCount() > 0

    # TODO: Add an error counter for files and objects.
    if not self.everything_set_okay:
      raise CommandException('Some IAM policies could not be set.')
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                self.args,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise