Beispiel #1
0
    def _GetIam(self, thread_state=None):
        """Gets IAM policy for single bucket or object."""

        pattern = self.args[0]

        matches = PluralityCheckableIterator(
            self.WildcardIterator(pattern).IterAll(
                bucket_listing_fields=['name']))
        if matches.IsEmpty():
            raise CommandException('%s matched no URLs' % pattern)
        if matches.HasPlurality():
            raise CommandException(
                '%s matched more than one URL, which is not allowed by the %s '
                'command' % (pattern, self.command_name))

        storage_url = StorageUrlFromString(list(matches)[0].url_string)
        policy = self.GetIamHelper(storage_url, thread_state=thread_state)
        policy_json = json.loads(protojson.encode_message(policy))
        policy_str = json.dumps(
            policy_json,
            sort_keys=True,
            indent=2,
        )
        print(policy_str)
Beispiel #2
0
  def RunCommand(self):
    """Command entry point for the hash command."""
    (calc_crc32c, calc_md5, format_func, cloud_format_func,
     output_format) = (self._ParseOpts(self.sub_opts, self.logger))

    matched_one = False
    for url_str in self.args:
      for file_ref in self.WildcardIterator(url_str).IterObjects(
          bucket_listing_fields=[
              'crc32c',
              'customerEncryption',
              'md5Hash',
              'size',
          ]):
        matched_one = True
        url = StorageUrlFromString(url_str)
        file_name = file_ref.storage_url.object_name
        if StorageUrlFromString(url_str).IsFileUrl():
          file_size = os.path.getsize(file_name)
          self.gsutil_api.status_queue.put(
              FileMessage(url,
                          None,
                          time.time(),
                          size=file_size,
                          finished=False,
                          message_type=FileMessage.FILE_HASH))
          callback_processor = ProgressCallbackWithTimeout(
              file_size,
              FileProgressCallbackHandler(self.gsutil_api.status_queue,
                                          src_url=StorageUrlFromString(url_str),
                                          operation_name='Hashing').call)
          hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
          with open(file_name, 'rb') as fp:
            hashing_helper.CalculateHashesFromContents(
                fp, hash_dict, callback_processor=callback_processor)
          self.gsutil_api.status_queue.put(
              FileMessage(url,
                          None,
                          time.time(),
                          size=file_size,
                          finished=True,
                          message_type=FileMessage.FILE_HASH))
        else:
          hash_dict = {}
          obj_metadata = file_ref.root_object
          file_size = obj_metadata.size
          md5_present = obj_metadata.md5Hash is not None
          crc32c_present = obj_metadata.crc32c is not None
          if not md5_present and not crc32c_present:
            logging.getLogger().warn('No hashes present for %s', url_str)
            continue
          if md5_present:
            hash_dict['md5'] = obj_metadata.md5Hash
          if crc32c_present:
            hash_dict['crc32c'] = obj_metadata.crc32c
        print('Hashes [%s] for %s:' % (output_format, file_name))
        for name, digest in six.iteritems(hash_dict):
          print('\tHash (%s):\t\t%s' % (name,
                                        (format_func(digest) if url.IsFileUrl()
                                         else cloud_format_func(digest))))

    if not matched_one:
      raise CommandException('No files matched')
    _PutToQueueWithTimeout(self.gsutil_api.status_queue,
                           FinalMessage(time.time()))
    return 0
Beispiel #3
0
class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase):
    """Unit tests for the HashingFileUploadWrapper class."""

    _temp_test_file = None
    _dummy_url = StorageUrlFromString('gs://bucket/object')

    def _GetTestFile(self):
        contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
        if not self._temp_test_file:
            self._temp_test_file = self.CreateTempFile(file_name=_TEST_FILE,
                                                       contents=contents)
        return self._temp_test_file

    def testReadToEOF(self):
        digesters = {'md5': GetMd5()}
        tmp_file = self.CreateTempFile(contents=b'a' * TRANSFER_BUFFER_SIZE *
                                       4)
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read()
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def _testSeekBack(self, initial_position, seek_back_amount):
        """Tests reading then seeking backwards.

    This function simulates an upload that is resumed after a connection break.
    It reads one transfer buffer at a time until it reaches initial_position,
    then seeks backwards (as if the server did not receive some of the bytes)
    and reads to the end of the file, ensuring the hash matches the original
    file upon completion.

    Args:
      initial_position: Initial number of bytes to read before seek.
      seek_back_amount: Number of bytes to seek backward.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertGreaterEqual(
            initial_position, seek_back_amount,
            'seek_back_amount must be less than initial position %s '
            '(but was actually: %s)' % (initial_position, seek_back_amount))
        self.assertLess(
            initial_position, tmp_file_len,
            'initial_position must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_position))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            position = 0
            while position < initial_position - TRANSFER_BUFFER_SIZE:
                data = wrapper.read(TRANSFER_BUFFER_SIZE)
                position += len(data)
            wrapper.read(initial_position - position)
            wrapper.seek(initial_position - seek_back_amount)
            self.assertEqual(wrapper.tell(),
                             initial_position - seek_back_amount)
            data = wrapper.read()
            self.assertEqual(
                len(data),
                tmp_file_len - (initial_position - seek_back_amount))
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testSeekToBeginning(self):
        for num_bytes in (TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                          TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 -
                          1, TRANSFER_BUFFER_SIZE * 2,
                          TRANSFER_BUFFER_SIZE * 2 + 1,
                          TRANSFER_BUFFER_SIZE * 3 - 1, TRANSFER_BUFFER_SIZE *
                          3, TRANSFER_BUFFER_SIZE * 3 + 1):
            self._testSeekBack(num_bytes, num_bytes)

    def testSeekBackAroundOneBuffer(self):
        for initial_position in (TRANSFER_BUFFER_SIZE + 1,
                                 TRANSFER_BUFFER_SIZE * 2 - 1,
                                 TRANSFER_BUFFER_SIZE * 2,
                                 TRANSFER_BUFFER_SIZE * 2 + 1,
                                 TRANSFER_BUFFER_SIZE * 3 - 1,
                                 TRANSFER_BUFFER_SIZE * 3,
                                 TRANSFER_BUFFER_SIZE * 3 + 1):
            for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1,
                                     TRANSFER_BUFFER_SIZE,
                                     TRANSFER_BUFFER_SIZE + 1):
                self._testSeekBack(initial_position, seek_back_amount)

    def testSeekBackMoreThanOneBuffer(self):
        for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1,
                                 TRANSFER_BUFFER_SIZE * 3 - 1,
                                 TRANSFER_BUFFER_SIZE * 3,
                                 TRANSFER_BUFFER_SIZE * 3 + 1):
            for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1,
                                     TRANSFER_BUFFER_SIZE * 2,
                                     TRANSFER_BUFFER_SIZE * 2 + 1):
                self._testSeekBack(initial_position, seek_back_amount)

    def _testSeekForward(self, initial_seek):
        """Tests seeking to an initial position and then reading.

    This function simulates an upload that is resumed after a process break.
    It seeks from zero to the initial position (as if the server already had
    those bytes). Then it reads to the end of the file, ensuring the hash
    matches the original file upon completion.

    Args:
      initial_seek: Number of bytes to initially seek.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_seek, tmp_file_len,
            'initial_seek must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_seek))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.seek(initial_seek)
            self.assertEqual(wrapper.tell(), initial_seek)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_seek)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testSeekForward(self):
        for initial_seek in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                             TRANSFER_BUFFER_SIZE + 1,
                             TRANSFER_BUFFER_SIZE * 2 - 1,
                             TRANSFER_BUFFER_SIZE * 2,
                             TRANSFER_BUFFER_SIZE * 2 + 1):
            self._testSeekForward(initial_seek)

    def _testSeekAway(self, initial_read):
        """Tests reading to an initial position and then seeking to EOF and back.

    This function simulates an size check on the input file by seeking to the
    end of the file and then back to the current position. Then it reads to
    the end of the file, ensuring the hash matches the original file upon
    completion.

    Args:
      initial_read: Number of bytes to initially read.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_read, tmp_file_len,
            'initial_read must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_read))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read(initial_read)
            self.assertEqual(wrapper.tell(), initial_read)
            wrapper.seek(0, os.SEEK_END)
            self.assertEqual(wrapper.tell(), tmp_file_len)
            wrapper.seek(initial_read, os.SEEK_SET)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_read)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())

    def testValidSeekAway(self):
        for initial_read in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE,
                             TRANSFER_BUFFER_SIZE + 1,
                             TRANSFER_BUFFER_SIZE * 2 - 1,
                             TRANSFER_BUFFER_SIZE * 2,
                             TRANSFER_BUFFER_SIZE * 2 + 1):
            self._testSeekAway(initial_read)

    def testInvalidSeekAway(self):
        """Tests seeking to EOF and then reading without first doing a SEEK_SET."""
        tmp_file = self._GetTestFile()
        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read(TRANSFER_BUFFER_SIZE)
            wrapper.seek(0, os.SEEK_END)
            try:
                wrapper.read()
                self.fail('Expected CommandException for invalid seek.')
            except CommandException as e:
                self.assertIn(
                    'Read called on hashing file pointer in an unknown position',
                    str(e))
Beispiel #4
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
    def __iter__(self,
                 bucket_listing_fields=None,
                 expand_top_level_buckets=False):
        """Iterator that gets called when iterating over the cloud wildcard.

    In the case where no wildcard is present, returns a single matching object,
    single matching prefix, or one of each if both exist.

    Args:
      bucket_listing_fields: Iterable fields to include in bucket listings.
                             Ex. ['name', 'acl'].  Iterator is
                             responsible for converting these to list-style
                             format ['items/name', 'items/acl'] as well as
                             adding any fields necessary for listing such as
                             prefixes.  API implementation is responsible for
                             adding pagination fields.  If this is None,
                             all fields are returned.
      expand_top_level_buckets: If true, yield no BUCKET references.  Instead,
                                expand buckets into top-level objects and
                                prefixes.

    Yields:
      BucketListingRef of type BUCKET, OBJECT or PREFIX.
    """
        single_version_request = self.wildcard_url.HasGeneration()

        # For wildcard expansion purposes, we need at a minimum the name of
        # each object and prefix.  If we're not using the default of requesting
        # all fields, make sure at least these are requested.  The Cloud API
        # tolerates specifying the same field twice.
        get_fields = None
        if bucket_listing_fields:
            get_fields = set()
            for field in bucket_listing_fields:
                get_fields.add(field)
            bucket_listing_fields = self._GetToListFields(
                get_fields=bucket_listing_fields)
            bucket_listing_fields.update(['items/name', 'prefixes'])
            get_fields.update(['name'])
            # If we're making versioned requests, ensure generation and
            # metageneration are also included.
            if single_version_request or self.all_versions:
                bucket_listing_fields.update(
                    ['items/generation', 'items/metageneration'])
                get_fields.update(['generation', 'metageneration'])

        # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
        # iterate over the expanded bucket strings and handle any object
        # wildcarding.
        for bucket_listing_ref in self._ExpandBucketWildcards(
                bucket_fields=['id']):
            bucket_url_string = bucket_listing_ref.url_string
            if self.wildcard_url.IsBucket():
                # IsBucket() guarantees there are no prefix or object wildcards, and
                # thus this is a top-level listing of buckets.
                if expand_top_level_buckets:
                    url = StorageUrlFromString(bucket_url_string)
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            delimiter='/',
                            all_versions=self.all_versions,
                            provider=self.wildcard_url.scheme,
                            fields=bucket_listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            yield self._GetObjectRef(
                                bucket_url_string,
                                obj_or_prefix.data,
                                with_version=self.all_versions)
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX:
                            yield self._GetPrefixRef(bucket_url_string,
                                                     obj_or_prefix.data)
                else:
                    yield bucket_listing_ref
            else:
                # By default, assume a non-wildcarded URL is an object, not a prefix.
                # This prevents unnecessary listings (which are slower, more expensive,
                # and also subject to eventual consistency).
                if (not ContainsWildcard(self.wildcard_url.url_string)
                        and self.wildcard_url.IsObject()
                        and not self.all_versions):
                    try:
                        get_object = self.gsutil_api.GetObjectMetadata(
                            self.wildcard_url.bucket_name,
                            self.wildcard_url.object_name,
                            generation=self.wildcard_url.generation,
                            provider=self.wildcard_url.scheme,
                            fields=get_fields)
                        yield self._GetObjectRef(
                            self.wildcard_url.bucket_url_string,
                            get_object,
                            with_version=(self.all_versions
                                          or single_version_request))
                        return
                    except (NotFoundException, AccessDeniedException):
                        # It's possible this is a prefix - try to list instead.
                        pass

                # Expand iteratively by building prefix/delimiter bucket listing
                # request, filtering the results per the current level's wildcard
                # (if present), and continuing with the next component of the
                # wildcard. See _BuildBucketFilterStrings() documentation for details.
                if single_version_request:
                    url_string = '%s%s#%s' % (bucket_url_string,
                                              self.wildcard_url.object_name,
                                              self.wildcard_url.generation)
                else:
                    # Rstrip any prefixes to correspond with rstripped prefix wildcard
                    # from _BuildBucketFilterStrings().
                    url_string = '%s%s' % (
                        bucket_url_string,
                        StripOneSlash(self.wildcard_url.object_name) or '/'
                    )  # Cover root object named '/' case.
                urls_needing_expansion = [url_string]
                while urls_needing_expansion:
                    url = StorageUrlFromString(urls_needing_expansion.pop(0))
                    (prefix, delimiter, prefix_wildcard,
                     suffix_wildcard) = (self._BuildBucketFilterStrings(
                         url.object_name))
                    prog = re.compile(fnmatch.translate(prefix_wildcard))

                    # If we have a suffix wildcard, we only care about listing prefixes.
                    listing_fields = (set(['prefixes']) if suffix_wildcard else
                                      bucket_listing_fields)

                    # List bucket for objects matching prefix up to delimiter.
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            prefix=prefix,
                            delimiter=delimiter,
                            all_versions=self.all_versions
                            or single_version_request,
                            provider=self.wildcard_url.scheme,
                            fields=listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            gcs_object = obj_or_prefix.data
                            if prog.match(gcs_object.name):
                                if not suffix_wildcard or (StripOneSlash(
                                        gcs_object.name) == suffix_wildcard):
                                    if not single_version_request or (
                                            self._SingleVersionMatches(
                                                gcs_object.generation)):
                                        yield self._GetObjectRef(
                                            bucket_url_string,
                                            gcs_object,
                                            with_version=(
                                                self.all_versions
                                                or single_version_request))
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX
                            prefix = obj_or_prefix.data

                            if ContainsWildcard(prefix):
                                # TODO: Disambiguate user-supplied strings from iterated
                                # prefix and object names so that we can better reason
                                # about wildcards and handle this case without raising an error.
                                raise CommandException(
                                    'Cloud folder %s%s contains a wildcard; gsutil does '
                                    'not currently support objects with wildcards in their '
                                    'name.' % (bucket_url_string, prefix))

                            # If the prefix ends with a slash, remove it.  Note that we only
                            # remove one slash so that we can successfully enumerate dirs
                            # containing multiple slashes.
                            rstripped_prefix = StripOneSlash(prefix)
                            if prog.match(rstripped_prefix):
                                if suffix_wildcard and rstripped_prefix != suffix_wildcard:
                                    # There's more wildcard left to expand.
                                    url_append_string = '%s%s' % (
                                        bucket_url_string, rstripped_prefix +
                                        '/' + suffix_wildcard)
                                    urls_needing_expansion.append(
                                        url_append_string)
                                else:
                                    # No wildcard to expand, just yield the prefix
                                    yield self._GetPrefixRef(
                                        bucket_url_string, prefix)
Beispiel #6
0
    def _Create(self):
        self.CheckArguments()

        # User-specified options
        pubsub_topic = None
        payload_format = None
        custom_attributes = {}
        event_types = []
        object_name_prefix = None
        should_setup_topic = True

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-e':
                    event_types.append(a)
                elif o == '-f':
                    payload_format = a
                elif o == '-m':
                    if ':' not in a:
                        raise CommandException(
                            'Custom attributes specified with -m should be of the form '
                            'key:value')
                    key, value = a.split(':')
                    custom_attributes[key] = value
                elif o == '-p':
                    object_name_prefix = a
                elif o == '-s':
                    should_setup_topic = False
                elif o == '-t':
                    pubsub_topic = a

        if payload_format not in PAYLOAD_FORMAT_MAP:
            raise CommandException(
                "Must provide a payload format with -f of either 'json' or 'none'"
            )
        payload_format = PAYLOAD_FORMAT_MAP[payload_format]

        bucket_arg = self.args[-1]

        bucket_url = StorageUrlFromString(bucket_arg)
        if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket():
            raise CommandException(
                "%s %s requires a GCS bucket name, but got '%s'" %
                (self.command_name, self.subcommand_name, bucket_arg))
        if bucket_url.scheme != 'gs':
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        bucket_name = bucket_url.bucket_name
        self.logger.debug('Creating notification for bucket %s', bucket_url)

        # Find the project this bucket belongs to
        bucket_metadata = self.gsutil_api.GetBucket(bucket_name,
                                                    fields=['projectNumber'],
                                                    provider=bucket_url.scheme)
        bucket_project_number = bucket_metadata.projectNumber

        # If not specified, choose a sensible default for the Cloud Pub/Sub topic
        # name.
        if not pubsub_topic:
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      bucket_name)
        if not pubsub_topic.startswith('projects/'):
            # If a user picks a topic ID (mytopic) but doesn't pass the whole name (
            # projects/my-project/topics/mytopic ), pick a default project.
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      pubsub_topic)
        self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic)

        just_modified_topic_permissions = False
        if should_setup_topic:
            # Ask GCS for the email address that represents GCS's permission to
            # publish to a Cloud Pub/Sub topic from this project.
            service_account = self.gsutil_api.GetProjectServiceAccount(
                bucket_project_number,
                provider=bucket_url.scheme).email_address
            self.logger.debug('Service account for project %d: %s',
                              bucket_project_number, service_account)
            just_modified_topic_permissions = self._CreateTopic(
                pubsub_topic, service_account)

        for attempt_number in range(0, 2):
            try:
                create_response = self.gsutil_api.CreateNotificationConfig(
                    bucket_name,
                    pubsub_topic=pubsub_topic,
                    payload_format=payload_format,
                    custom_attributes=custom_attributes,
                    event_types=event_types if event_types else None,
                    object_name_prefix=object_name_prefix,
                    provider=bucket_url.scheme)
                break
            except PublishPermissionDeniedException:
                if attempt_number == 0 and just_modified_topic_permissions:
                    # If we have just set the IAM policy, it may take up to 10 seconds to
                    # take effect.
                    self.logger.info(
                        'Retrying create notification in 10 seconds '
                        '(new permissions may take up to 10 seconds to take effect.)'
                    )
                    time.sleep(10)
                else:
                    raise

        notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % (
            bucket_name, create_response.id)
        self.logger.info('Created notification config %s', notification_name)

        return 0
    def test_FilterExistingComponentsNonVersioned(self):
        """Tests upload with a variety of component states."""
        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_uploaded_correctly,
            md5Hash=fpath_uploaded_correctly_md5),
                                              contents='1')

        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded))
        object_not_uploaded_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_not_uploaded))
        args_not_uploaded = PerformParallelUploadFileToObjectArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded_url,
            object_not_uploaded_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        object_wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_wrong_contents,
            md5Hash=fpath_wrong_contents_md5),
                                              contents='1')

        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            object_wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        fpath_remote_deleted_url = StorageUrlFromString(
            str(fpath_remote_deleted))
        args_remote_deleted = PerformParallelUploadFileToObjectArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '',
            empty_object, tracker_file, tracker_file_lock, None)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        with open(fpath_no_longer_used) as f_in:
            file_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name, name='foo6', md5Hash=file_md5),
                                              contents='6')

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(1, len(uploaded_components))
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        self.assertEqual(1, len(existing_objects_to_delete))
        no_longer_used_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_no_longer_used))
        self.assertEqual(no_longer_used_url.url_string,
                         existing_objects_to_delete[0].url_string)
  def testWarnIfMvEarlyDeletionChargeApplies(self):
    """Tests that WarnIfEarlyDeletionChargeApplies warns when appropriate."""
    test_logger = logging.Logger('test')
    src_url = StorageUrlFromString('gs://bucket/object')

    # Recent nearline objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=29, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'nearline',
            src_url.url_string, 30)

    # Recent coldine objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=89, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='COLDLINE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'coldline',
            src_url.url_string, 90)

    # Recent archive objects should generate a warning.
    for object_time_created in (self._PI_DAY, self._PI_DAY -
                                datetime.timedelta(days=364, hours=23)):
      recent_archive_obj = apitools_messages.Object(
          storageClass='ARCHIVE', timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_archive_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'archive',
            src_url.url_string, 365)

    # Sufficiently old objects should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=30, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_nearline_obj, test_logger)
      mocked_warn.assert_not_called()
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_coldline_obj = apitools_messages.Object(
          storageClass='COLDLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=90, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_coldline_obj, test_logger)
      mocked_warn.assert_not_called()
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_archive_obj = apitools_messages.Object(
          storageClass='ARCHIVE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=365, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_archive_obj, test_logger)
      mocked_warn.assert_not_called()

    # Recent standard storage class object should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      not_old_enough_nearline_obj = apitools_messages.Object(
          storageClass='STANDARD', timeCreated=self._PI_DAY)
      WarnIfMvEarlyDeletionChargeApplies(src_url, not_old_enough_nearline_obj,
                                         test_logger)
      mocked_warn.assert_not_called()
Beispiel #9
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0
Beispiel #10
0
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                try:
                    patch_bindings_tuples.append(
                        BindingStringToTuple(True, token))
                # All following arguments are urls.
                except (ArgumentException, CommandException):
                    patterns.append(token)
                    for token in it:
                        patterns.append(token)

        # We must have some bindings to process, else this is pointless.
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        self.everything_set_okay = True
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            # N.B.: Python2.6 support means we can't use a partial function here to
            # curry the bindings tuples into the wrapper function. We instead pass
            # the bindings along by zipping them with each name_expansion_iterator
            # result. See http://bugs.python.org/issue5228.
            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be patched.')
Beispiel #11
0
    def RunCommand(self):
        """Command entry point for the mb command."""
        autoclass = False
        bucket_policy_only = None
        kms_key = None
        location = None
        storage_class = None
        seconds = None
        public_access_prevention = None
        rpo = None
        json_only_flags_in_command = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '--autoclass':
                    autoclass = True
                    json_only_flags_in_command.append(o)
                elif o == '-k':
                    kms_key = a
                    ValidateCMEK(kms_key)
                    json_only_flags_in_command.append(o)
                elif o == '-l':
                    location = a
                elif o == '-p':
                    # Project IDs are sent as header values when using gs and s3 XML APIs.
                    InsistAscii(
                        a, 'Invalid non-ASCII character found in project ID')
                    self.project_id = a
                elif o == '-c' or o == '-s':
                    storage_class = NormalizeStorageClass(a)
                elif o == '--retention':
                    seconds = RetentionInSeconds(a)
                elif o == '--rpo':
                    rpo = a.strip()
                    if rpo not in VALID_RPO_VALUES:
                        raise CommandException(
                            'Invalid value for --rpo. Must be one of: {},'
                            ' provided: {}'.format(VALID_RPO_VALUES_STRING, a))
                    json_only_flags_in_command.append(o)
                elif o == '-b':
                    InsistOnOrOff(
                        a, 'Only on and off values allowed for -b option')
                    bucket_policy_only = (a == 'on')
                    json_only_flags_in_command.append(o)
                elif o == '--pap':
                    public_access_prevention = a
                    json_only_flags_in_command.append(o)

        bucket_metadata = apitools_messages.Bucket(location=location,
                                                   rpo=rpo,
                                                   storageClass=storage_class)
        if autoclass:
            bucket_metadata.autoclass = apitools_messages.Bucket.AutoclassValue(
                enabled=autoclass)
        if bucket_policy_only or public_access_prevention:
            bucket_metadata.iamConfiguration = IamConfigurationValue()
            iam_config = bucket_metadata.iamConfiguration
            if bucket_policy_only:
                iam_config.bucketPolicyOnly = BucketPolicyOnlyValue()
                iam_config.bucketPolicyOnly.enabled = bucket_policy_only
            if public_access_prevention:
                iam_config.publicAccessPrevention = public_access_prevention

        if kms_key:
            encryption = apitools_messages.Bucket.EncryptionValue()
            encryption.defaultKmsKeyName = kms_key
            bucket_metadata.encryption = encryption

        for bucket_url_str in self.args:
            bucket_url = StorageUrlFromString(bucket_url_str)
            if seconds is not None:
                if bucket_url.scheme != 'gs':
                    raise CommandException(
                        'Retention policy can only be specified for '
                        'GCS buckets.')
                retention_policy = (
                    apitools_messages.Bucket.RetentionPolicyValue(
                        retentionPeriod=seconds))
                bucket_metadata.retentionPolicy = retention_policy

            if json_only_flags_in_command and self.gsutil_api.GetApiSelector(
                    bucket_url.scheme) != ApiSelector.JSON:
                raise CommandException(
                    'The {} option(s) can only be used for GCS'
                    ' Buckets with the JSON API'.format(
                        ', '.join(json_only_flags_in_command)))

            if not bucket_url.IsBucket():
                raise CommandException(
                    'The mb command requires a URL that specifies a '
                    'bucket.\n"%s" is not valid.' % bucket_url)
            if (not BUCKET_NAME_RE.match(bucket_url.bucket_name)
                    or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)):
                raise InvalidUrlError('Invalid bucket name in URL "%s"' %
                                      bucket_url.bucket_name)

            self.logger.info('Creating %s...', bucket_url)
            # Pass storage_class param only if this is a GCS bucket. (In S3 the
            # storage class is specified on the key object.)
            try:
                self.gsutil_api.CreateBucket(bucket_url.bucket_name,
                                             project_id=self.project_id,
                                             metadata=bucket_metadata,
                                             provider=bucket_url.scheme)
            except AccessDeniedException as e:
                message = e.reason
                if 'key' in message:
                    # This will print the error reason and append the following as a
                    # suggested next step:
                    #
                    # To authorize, run:
                    #   gsutil kms authorize \
                    #     -k <kms_key> \
                    #     -p <project_id>
                    message += ' To authorize, run:\n  gsutil kms authorize'
                    message += ' \\\n    -k %s' % kms_key
                    if (self.project_id):
                        message += ' \\\n    -p %s' % self.project_id
                    raise CommandException(message)
                else:
                    raise

            except BadRequestException as e:
                if (e.status == 400
                        and e.reason == 'DotfulBucketNameNotUnderTld'
                        and bucket_url.scheme == 'gs'):
                    bucket_name = bucket_url.bucket_name
                    final_comp = bucket_name[bucket_name.rfind('.') + 1:]
                    raise CommandException('\n'.join(
                        textwrap.wrap(
                            'Buckets with "." in the name must be valid DNS names. The bucket'
                            ' you are attempting to create (%s) is not a valid DNS name,'
                            ' because the final component (%s) is not currently a valid part'
                            ' of the top-level DNS tree.' %
                            (bucket_name, final_comp))))
                else:
                    raise

        return 0
Beispiel #12
0
  def CatUrlStrings(self,
                    url_strings,
                    show_header=False,
                    start_byte=0,
                    end_byte=None,
                    cat_out_fd=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # This should refer to whatever sys.stdin refers to when this method is
    # run, not when this method is defined, so we do the initialization here
    # rather than define sys.stdin as the cat_out_fd parameter's default value.
    if cat_out_fd is None:
      cat_out_fd = sys.stdout
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    try:
      if url_strings and url_strings[0] in ('-', 'file://-'):
        self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
      else:
        for url_str in url_strings:
          did_some_work = False
          # TODO: Get only the needed fields here.
          for blr in self.command_obj.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
            decryption_keywrapper = None
            if (blr.root_object and blr.root_object.customerEncryption and
                blr.root_object.customerEncryption.keySha256):
              decryption_key = FindMatchingCSEKInBotoConfig(
                  blr.root_object.customerEncryption.keySha256, config)
              if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption '
                    'key matches object %s' %
                    (blr.root_object.customerEncryption.keySha256,
                     blr.url_string))
              decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key)

            did_some_work = True
            if show_header:
              if printed_one:
                print()
              print('==> %s <==' % blr)
              printed_one = True
            cat_object = blr.root_object
            storage_url = StorageUrlFromString(blr.url_string)
            if storage_url.IsCloudUrl():
              compressed_encoding = ObjectIsGzipEncoded(cat_object)
              self.command_obj.gsutil_api.GetObjectMedia(
                  cat_object.bucket,
                  cat_object.name,
                  cat_out_fd,
                  compressed_encoding=compressed_encoding,
                  start_byte=start_byte,
                  end_byte=end_byte,
                  object_size=cat_object.size,
                  generation=storage_url.generation,
                  decryption_tuple=decryption_keywrapper,
                  provider=storage_url.scheme)
            else:
              with open(storage_url.object_name, 'rb') as f:
                self._WriteBytesBufferedFileToFile(f, cat_out_fd)
          if not did_some_work:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
    finally:
      sys.stdout = old_stdout

    return 0
Beispiel #13
0
  def __iter__(self):
    """Iterates over src/dst URLs and produces a _DiffToApply sequence.

    Yields:
      The _DiffToApply.
    """
    # Strip trailing slashes, if any, so we compute tail length against
    # consistent position regardless of whether trailing slashes were included
    # or not in URL.
    base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\'))
    base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\'))
    src_url_str = dst_url_str = None
    # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
    # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
    # processed. Each time we encounter None in src_url_str or dst_url_str we
    # populate from the respective iterator, and we reset one or the other value
    # to None after yielding an action that disposes of that URL.
    while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
      if src_url_str is None:
        (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine(
            self.sorted_src_urls_it.next())
        # Skip past base URL and normalize slashes so we can compare across
        # clouds/file systems (including Windows).
        src_url_str_to_check = _EncodeUrl(
            src_url_str[base_src_url_len:].replace('\\', '/'))
        dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
            self.base_src_url, StorageUrlFromString(src_url_str), True, True,
            self.base_dst_url, False, self.recursion_requested).url_string
      if self.sorted_dst_urls_it.IsEmpty():
        # We've reached end of dst URLs, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
        continue
      if not dst_url_str:
        (dst_url_str, dst_size, dst_crc32c, dst_md5) = (
            self._ParseTmpFileLine(self.sorted_dst_urls_it.next()))
        # Skip past base URL and normalize slashes so we can compare acros
        # clouds/file systems (including Windows).
        dst_url_str_to_check = _EncodeUrl(
            dst_url_str[base_dst_url_len:].replace('\\', '/'))

      if src_url_str_to_check < dst_url_str_to_check:
        # There's no dst object corresponding to src object, so copy src to dst.
        yield _DiffToApply(
            src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
        src_url_str = None
      elif src_url_str_to_check > dst_url_str_to_check:
        # dst object without a corresponding src object, so remove dst if -d
        # option was specified.
        if self.delete_extras:
          yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
        dst_url_str = None
      else:
        # There is a dst object corresponding to src object, so check if objects
        # match.
        if self._ObjectsMatch(
            src_url_str, src_size, src_crc32c, src_md5,
            dst_url_str, dst_size, dst_crc32c, dst_md5):
          # Continue iterating without yielding a _DiffToApply.
          pass
        else:
          yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY)
        src_url_str = None
        dst_url_str = None

    # If -d option specified any files/objects left in dst iteration should be
    # removed.
    if not self.delete_extras:
      return
    if dst_url_str:
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
      dst_url_str = None
    for line in self.sorted_dst_urls_it:
      (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
      yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
Beispiel #14
0
    def RunCommand(self):
        """Command entry point for the setmeta command."""
        headers = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-h':
                    if 'x-goog-acl' in a or 'x-amz-acl' in a:
                        raise CommandException(
                            'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                            'set ... to set canned ACLs.')
                    headers.append(a)

        (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

        self.metadata_change = metadata_plus
        for header in metadata_minus:
            self.metadata_change[header] = ''

        if len(self.args) == 1 and not self.recursion_requested:
            url = StorageUrlFromString(self.args[0])
            if not (url.IsCloudUrl() and url.IsObject()):
                raise CommandException('URL (%s) must name an object' %
                                       self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        self.preconditions = PreconditionsFromHeaders(self.headers)

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            continue_on_error=self.parallel_operations,
            bucket_listing_fields=['generation', 'metadata', 'metageneration'])

        seek_ahead_iterator = SeekAheadNameExpansionIterator(
            self.command_name,
            self.debug,
            self.GetSeekAheadGsutilApi(),
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            project_id=self.project_id)

        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFuncWrapper,
                       name_expansion_iterator,
                       _SetMetadataExceptionHandler,
                       fail_on_error=True,
                       seek_ahead_iterator=seek_ahead_iterator)
        except AccessDeniedException as e:
            if e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
Beispiel #15
0
    def __iter__(self, bucket_listing_fields=None):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Args:
      bucket_listing_fields: Iterable fields to include in listings.
          Ex. ['size']. Currently only 'size' is supported.
          If present, will populate yielded BucketListingObject.root_object
          with the file name and size.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        include_size = (bucket_listing_fields
                        and 'size' in set(bucket_listing_fields))

        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if self.ignore_symlinks and os.path.islink(filepath):
                    if self.logger:
                        self.logger.info('Skipping symbolic link %s...',
                                         filepath)
                    continue
                # Added for HAF's specific usecase of ignoring broken symbolic links
                if (os.path.islink(filepath) and not os.path.exists(filepath)):
                    print("Bad link: " + filepath)
                    continue
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    blr_object = _GetFileObject(
                        filepath) if include_size else None
                    yield BucketListingObject(expanded_url,
                                              root_object=blr_object)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
Beispiel #16
0
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if STORAGE_URI_REGEX.match(token):
                patterns.append(token)
                break
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                patch_bindings_tuples.append(BindingStringToTuple(True, token))
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        # All following arguments are urls.
        for token in it:
            patterns.append(token)

        self.everything_set_okay = True
        self.tried_ch_on_resource_with_conditions = False
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            msg = 'Some IAM policies could not be patched.'
            if self.tried_ch_on_resource_with_conditions:
                msg += '\n'
                msg += '\n'.join(
                    textwrap.wrap(
                        'Some resources had conditions present in their IAM policy '
                        'bindings, which is not supported by "iam ch". %s' %
                        (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
            raise CommandException(msg)
Beispiel #17
0
    def _SetIam(self):
        """Set IAM policy for given wildcards on the command line."""

        self.continue_on_error = False
        self.recursion_requested = False
        self.all_versions = False
        force_etag = False
        etag = ''
        if self.sub_opts:
            for o, arg in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    etag = str(arg)
                    force_etag = True
                else:
                    self.RaiseInvalidArgumentException()

        file_url = self.args[0]
        patterns = self.args[1:]

        # Load the IAM policy file and raise error if the file is invalid JSON or
        # does not exist.
        try:
            with open(file_url, 'r') as fp:
                policy = json.loads(fp.read())
        except IOError:
            raise ArgumentException(
                'Specified IAM policy file "%s" does not exist.' % file_url)
        except ValueError as e:
            self.logger.debug('Invalid IAM policy file, ValueError:\n', e)
            raise ArgumentException('Invalid IAM policy file "%s".' % file_url)

        bindings = policy.get('bindings', [])
        if not force_etag:
            etag = policy.get('etag', '')

        policy_json = json.dumps({'bindings': bindings, 'etag': etag})
        try:
            policy = protojson.decode_message(apitools_messages.Policy,
                                              policy_json)
        except DecodeError:
            raise ArgumentException(
                'Invalid IAM policy file "%s" or etag "%s".' %
                (file_url, etag))

        self.everything_set_okay = True

        # This list of wildcard strings will be handled by NameExpansionIterator.
        threaded_wildcards = []

        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            if surl.IsBucket():
                if self.recursion_requested:
                    surl.object_name = '*'
                    threaded_wildcards.append(surl.url_string)
                else:
                    self.SetIamHelper(surl, policy)
            else:
                threaded_wildcards.append(surl.url_string)

        # N.B.: If threaded_wildcards contains a non-existent bucket
        # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
        # will raise an exception in iter.next. This halts all iteration, even
        # when -f is set. This behavior is also evident in acl set. This behavior
        # also appears for any exception that will be raised when iterating over
        # wildcard expansions (access denied if bucket cannot be listed, etc.).
        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            policy_it = itertools.repeat(protojson.encode_message(policy))
            self.Apply(_SetIamWrapper,
                       itertools.izip(policy_it, name_expansion_iterator),
                       _SetIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be set.')
Beispiel #18
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise
    def test_FilterExistingComponentsVersioned(self):
        """Tests upload with versionined parallel components."""

        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        mock_api.MockCreateVersionedBucket(bucket_name)

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_uploaded_correctly,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             object_uploaded_correctly.generation))
        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url,
            object_uploaded_correctly.generation, empty_object, tracker_file,
            tracker_file_lock, None)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate))
        duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_duplicate,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        duplicate_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             duplicate_uploaded_correctly.generation))
        args_duplicate = PerformParallelUploadFileToObjectArgs(
            fpath_duplicate, 0, 1, fpath_duplicate_url,
            duplicate_uploaded_correctly_url,
            duplicate_uploaded_correctly.generation, empty_object,
            tracker_file, tracker_file_lock, None)

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        object_wrong_contents = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_wrong_contents,
                                     md5Hash=fpath_wrong_contents_md5),
            contents='_')
        wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents,
             object_wrong_contents.generation))
        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              object_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_duplicate,
                              duplicate_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              wrong_contents_url.generation)
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        expected_to_delete = [(args_wrong_contents.dst_url.object_name,
                               args_wrong_contents.dst_url.generation),
                              (args_duplicate.dst_url.object_name,
                               args_duplicate.dst_url.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
Beispiel #20
0
 def _GetDefAcl(self):
   if not StorageUrlFromString(self.args[0]).IsBucket():
     raise CommandException('URL must name a bucket for the %s command' %
                            self.command_name)
   self.GetAndPrintAcl(self.args[0])
Beispiel #21
0
def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
    """Print full info for given object (like what displays for gsutil ls -L).

  Args:
    bucket_listing_ref: BucketListingRef being listed.
                        Must have ref_type OBJECT and a populated root_object
                        with the desired fields.
    incl_acl: True if ACL info should be output.

  Returns:
    Tuple (number of objects, object_length)

  Raises:
    Exception: if calling bug encountered.
  """
    url_str = bucket_listing_ref.url_string
    storage_url = StorageUrlFromString(url_str)
    obj = bucket_listing_ref.root_object

    if (obj.metadata
            and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties):
        num_bytes = 0
        num_objs = 0
        url_str += '<DeleteMarker>'
    else:
        num_bytes = obj.size
        num_objs = 1

    print '%s:' % url_str.encode(UTF8)
    if obj.updated:
        print '\tCreation time:\t\t%s' % obj.updated.strftime(
            '%a, %d %b %Y %H:%M:%S GMT')
    if obj.cacheControl:
        print '\tCache-Control:\t\t%s' % obj.cacheControl
    if obj.contentDisposition:
        print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
    if obj.contentEncoding:
        print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
    if obj.contentLanguage:
        print '\tContent-Language:\t%s' % obj.contentLanguage
    print '\tContent-Length:\t\t%s' % obj.size
    print '\tContent-Type:\t\t%s' % obj.contentType
    if obj.componentCount:
        print '\tComponent-Count:\t%d' % obj.componentCount
    marker_props = {}
    if obj.metadata and obj.metadata.additionalProperties:
        non_marker_props = []
        for add_prop in obj.metadata.additionalProperties:
            if add_prop.key not in S3_MARKER_GUIDS:
                non_marker_props.append(add_prop)
            else:
                marker_props[add_prop.key] = add_prop.value
        if non_marker_props:
            print '\tMetadata:'
            for ap in non_marker_props:
                meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
                print meta_string.encode(UTF8)
    if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
    if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
    print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
    if obj.generation:
        generation_str = GenerationFromUrlAndString(storage_url,
                                                    obj.generation)
        print '\tGeneration:\t\t%s' % generation_str
    if obj.metageneration:
        print '\tMetageneration:\t\t%s' % obj.metageneration
    if incl_acl:
        # JSON API won't return acls as part of the response unless we have
        # full control scope
        if obj.acl:
            print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
        elif S3_ACL_MARKER_GUID in marker_props:
            print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
        else:
            print(
                '\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
                'permission\n\t\t\t\ton the object to read its ACL.')

    return (num_objs, num_bytes)
class TestDaisyChainWrapper(testcase.GsUtilUnitTestCase):
  """Unit tests for the DaisyChainWrapper class."""

  _temp_test_file = None
  _dummy_url = StorageUrlFromString('gs://bucket/object')

  def setUp(self):
    super(TestDaisyChainWrapper, self).setUp()
    self.test_data_file = self._GetTestFile()
    self.test_data_file_len = os.path.getsize(self.test_data_file)

  def _GetTestFile(self):
    contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
    if not self._temp_test_file:
      # Write to a temp file because pkgutil doesn't expose a stream interface.
      self._temp_test_file = self.CreateTempFile(
          file_name=_TEST_FILE, contents=contents)
    return self._temp_test_file

  class MockDownloadCloudApi(gslib.cloud_api.CloudApi):
    """Mock CloudApi that implements GetObjectMedia for testing."""

    def __init__(self, write_values):
      """Initialize the mock that will be used by the download thread.

      Args:
        write_values: List of values that will be used for calls to write(),
            in order, by the download thread. An Exception class may be part of
            the list; if so, the Exception will be raised after previous
            values are consumed.
      """
      self._write_values = write_values
      self.get_calls = 0

    def GetObjectMedia(self, unused_bucket_name, unused_object_name,
                       download_stream, start_byte=0, end_byte=None,
                       **kwargs):
      """Writes self._write_values to the download_stream."""
      # Writes from start_byte up to, but not including end_byte (if not None).
      # Does not slice values;
      # self._write_values must line up with start/end_byte.
      self.get_calls += 1
      bytes_read = 0
      for write_value in self._write_values:
        if bytes_read < start_byte:
          bytes_read += len(write_value)
          continue
        if end_byte and bytes_read >= end_byte:
          break
        if isinstance(write_value, Exception):
          raise write_value
        download_stream.write(write_value)
        bytes_read += len(write_value)

  def _WriteFromWrapperToFile(self, daisy_chain_wrapper, file_path):
    """Writes all contents from the DaisyChainWrapper to the named file."""
    with open(file_path, 'wb') as upload_stream:
      while True:
        data = daisy_chain_wrapper.read(TRANSFER_BUFFER_SIZE)
        if not data:
          break
        upload_stream.write(data)

  def testDownloadSingleChunk(self):
    """Tests a single call to GetObjectMedia."""
    write_values = []
    with open(self.test_data_file, 'rb') as stream:
      while True:
        data = stream.read(TRANSFER_BUFFER_SIZE)
        if not data:
          break
        write_values.append(data)
    upload_file = self.CreateTempFile()
    # Test for a single call even if the chunk size is larger than the data.
    for chunk_size in (self.test_data_file_len, self.test_data_file_len + 1):
      mock_api = self.MockDownloadCloudApi(write_values)
      daisy_chain_wrapper = DaisyChainWrapper(
          self._dummy_url, self.test_data_file_len, mock_api,
          download_chunk_size=chunk_size)
      self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
      # Since the chunk size is >= the file size, only a single GetObjectMedia
      # call should be made.
      self.assertEquals(mock_api.get_calls, 1)
      with open(upload_file, 'rb') as upload_stream:
        with open(self.test_data_file, 'rb') as download_stream:
          self.assertEqual(upload_stream.read(), download_stream.read())

  def testDownloadMultiChunk(self):
    """Tests multiple calls to GetObjectMedia."""
    upload_file = self.CreateTempFile()
    write_values = []
    with open(self.test_data_file, 'rb') as stream:
      while True:
        data = stream.read(TRANSFER_BUFFER_SIZE)
        if not data:
          break
        write_values.append(data)
    mock_api = self.MockDownloadCloudApi(write_values)
    daisy_chain_wrapper = DaisyChainWrapper(
        self._dummy_url, self.test_data_file_len, mock_api,
        download_chunk_size=TRANSFER_BUFFER_SIZE)
    self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
    num_expected_calls = self.test_data_file_len / TRANSFER_BUFFER_SIZE
    if self.test_data_file_len % TRANSFER_BUFFER_SIZE:
      num_expected_calls += 1
    # Since the chunk size is < the file size, multiple calls to GetObjectMedia
    # should be made.
    self.assertEqual(mock_api.get_calls, num_expected_calls)
    with open(upload_file, 'rb') as upload_stream:
      with open(self.test_data_file, 'rb') as download_stream:
        self.assertEqual(upload_stream.read(), download_stream.read())

  def testDownloadWithZeroWrites(self):
    """Tests 0-byte writes to the download stream from GetObjectMedia."""
    write_values = []
    with open(self.test_data_file, 'rb') as stream:
      while True:
        write_values.append(b'')
        data = stream.read(TRANSFER_BUFFER_SIZE)
        write_values.append(b'')
        if not data:
          break
        write_values.append(data)
    upload_file = self.CreateTempFile()
    mock_api = self.MockDownloadCloudApi(write_values)
    daisy_chain_wrapper = DaisyChainWrapper(
        self._dummy_url, self.test_data_file_len, mock_api,
        download_chunk_size=self.test_data_file_len)
    self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
    self.assertEquals(mock_api.get_calls, 1)
    with open(upload_file, 'rb') as upload_stream:
      with open(self.test_data_file, 'rb') as download_stream:
        self.assertEqual(upload_stream.read(), download_stream.read())

  def testDownloadWithPartialWrite(self):
    """Tests unaligned writes to the download stream from GetObjectMedia."""
    with open(self.test_data_file, 'rb') as stream:
      chunk = stream.read(TRANSFER_BUFFER_SIZE)
    one_byte = chunk[0]
    chunk_minus_one_byte = chunk[1:TRANSFER_BUFFER_SIZE]
    half_chunk = chunk[0:TRANSFER_BUFFER_SIZE/2]

    write_values_dict = {
        'First byte first chunk unaligned':
            (one_byte, chunk_minus_one_byte, chunk, chunk),
        'Last byte first chunk unaligned':
            (chunk_minus_one_byte, chunk, chunk),
        'First byte second chunk unaligned':
            (chunk, one_byte, chunk_minus_one_byte, chunk),
        'Last byte second chunk unaligned':
            (chunk, chunk_minus_one_byte, one_byte, chunk),
        'First byte final chunk unaligned':
            (chunk, chunk, one_byte, chunk_minus_one_byte),
        'Last byte final chunk unaligned':
            (chunk, chunk, chunk_minus_one_byte, one_byte),
        'Half chunks':
            (half_chunk, half_chunk, half_chunk),
        'Many unaligned':
            (one_byte, half_chunk, one_byte, half_chunk, chunk,
             chunk_minus_one_byte, chunk, one_byte, half_chunk, one_byte)
        }
    upload_file = self.CreateTempFile()
    for case_name, write_values in write_values_dict.iteritems():
      expected_contents = b''
      for write_value in write_values:
        expected_contents += write_value
      mock_api = self.MockDownloadCloudApi(write_values)
      daisy_chain_wrapper = DaisyChainWrapper(
          self._dummy_url, len(expected_contents), mock_api,
          download_chunk_size=self.test_data_file_len)
      self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
      with open(upload_file, 'rb') as upload_stream:
        self.assertEqual(upload_stream.read(), expected_contents,
                         'Uploaded file contents for case %s did not match'
                         % case_name)

  def testSeekAndReturn(self):
    """Tests seeking to the end of the wrapper (simulates getting size)."""
    write_values = []
    with open(self.test_data_file, 'rb') as stream:
      while True:
        data = stream.read(TRANSFER_BUFFER_SIZE)
        if not data:
          break
        write_values.append(data)
    upload_file = self.CreateTempFile()
    mock_api = self.MockDownloadCloudApi(write_values)
    daisy_chain_wrapper = DaisyChainWrapper(
        self._dummy_url, self.test_data_file_len, mock_api,
        download_chunk_size=self.test_data_file_len)
    with open(upload_file, 'wb') as upload_stream:
      current_position = 0
      daisy_chain_wrapper.seek(0, whence=os.SEEK_END)
      daisy_chain_wrapper.seek(current_position)
      while True:
        data = daisy_chain_wrapper.read(TRANSFER_BUFFER_SIZE)
        current_position += len(data)
        daisy_chain_wrapper.seek(0, whence=os.SEEK_END)
        daisy_chain_wrapper.seek(current_position)
        if not data:
          break
        upload_stream.write(data)
    self.assertEquals(mock_api.get_calls, 1)
    with open(upload_file, 'rb') as upload_stream:
      with open(self.test_data_file, 'rb') as download_stream:
        self.assertEqual(upload_stream.read(), download_stream.read())

  def testRestartDownloadThread(self):
    """Tests seek to non-stored position; this restarts the download thread."""
    write_values = []
    with open(self.test_data_file, 'rb') as stream:
      while True:
        data = stream.read(TRANSFER_BUFFER_SIZE)
        if not data:
          break
        write_values.append(data)
    upload_file = self.CreateTempFile()
    mock_api = self.MockDownloadCloudApi(write_values)
    daisy_chain_wrapper = DaisyChainWrapper(
        self._dummy_url, self.test_data_file_len, mock_api,
        download_chunk_size=self.test_data_file_len)
    daisy_chain_wrapper.read(TRANSFER_BUFFER_SIZE)
    daisy_chain_wrapper.read(TRANSFER_BUFFER_SIZE)
    daisy_chain_wrapper.seek(0)
    self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
    self.assertEquals(mock_api.get_calls, 2)
    with open(upload_file, 'rb') as upload_stream:
      with open(self.test_data_file, 'rb') as download_stream:
        self.assertEqual(upload_stream.read(), download_stream.read())

  def testDownloadThreadException(self):
    """Tests that an exception is propagated via the upload thread."""

    class DownloadException(Exception):
      pass

    write_values = [b'a', b'b',
                    DownloadException('Download thread forces failure')]
    upload_file = self.CreateTempFile()
    mock_api = self.MockDownloadCloudApi(write_values)
    daisy_chain_wrapper = DaisyChainWrapper(
        self._dummy_url, self.test_data_file_len, mock_api,
        download_chunk_size=self.test_data_file_len)
    try:
      self._WriteFromWrapperToFile(daisy_chain_wrapper, upload_file)
      self.fail('Expected exception')
    except DownloadException, e:
      self.assertIn('Download thread forces failure', str(e))
 def setUp(self):
     super(TestAcl, self).setUp()
     self.sample_uri = self.CreateBucket()
     self.sample_url = StorageUrlFromString(str(self.sample_uri))
     self.logger = CreateGsutilLogger('acl')
Beispiel #24
0
    def RunCommand(self):
        """Command entry point for the mb command."""
        bucket_policy_only = None
        location = None
        storage_class = None
        seconds = None
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-l':
                    location = a
                elif o == '-p':
                    # Project IDs are sent as header values when using gs and s3 XML APIs.
                    InsistAscii(
                        a, 'Invalid non-ASCII character found in project ID')
                    self.project_id = a
                elif o == '-c' or o == '-s':
                    storage_class = NormalizeStorageClass(a)
                elif o == '--retention':
                    seconds = RetentionInSeconds(a)
                elif o == '-b':
                    if self.gsutil_api.GetApiSelector(
                            'gs') != ApiSelector.JSON:
                        raise CommandException(
                            'The -b <on|off> option '
                            'can only be used with the JSON API')
                    InsistOnOrOff(
                        a, 'Only on and off values allowed for -b option')
                    bucket_policy_only = (a == 'on')

        bucket_metadata = apitools_messages.Bucket(location=location,
                                                   storageClass=storage_class)
        if bucket_policy_only:
            bucket_metadata.iamConfiguration = IamConfigurationValue()
            iam_config = bucket_metadata.iamConfiguration
            iam_config.bucketPolicyOnly = BucketPolicyOnlyValue()
            iam_config.bucketPolicyOnly.enabled = bucket_policy_only

        for bucket_url_str in self.args:
            bucket_url = StorageUrlFromString(bucket_url_str)
            if seconds is not None:
                if bucket_url.scheme != 'gs':
                    raise CommandException(
                        'Retention policy can only be specified for '
                        'GCS buckets.')
                retention_policy = (
                    apitools_messages.Bucket.RetentionPolicyValue(
                        retentionPeriod=seconds))
                bucket_metadata.retentionPolicy = retention_policy

            if not bucket_url.IsBucket():
                raise CommandException(
                    'The mb command requires a URL that specifies a '
                    'bucket.\n"%s" is not valid.' % bucket_url)
            if (not BUCKET_NAME_RE.match(bucket_url.bucket_name)
                    or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)):
                raise InvalidUrlError('Invalid bucket name in URL "%s"' %
                                      bucket_url.bucket_name)

            self.logger.info('Creating %s...', bucket_url)
            # Pass storage_class param only if this is a GCS bucket. (In S3 the
            # storage class is specified on the key object.)
            try:
                self.gsutil_api.CreateBucket(bucket_url.bucket_name,
                                             project_id=self.project_id,
                                             metadata=bucket_metadata,
                                             provider=bucket_url.scheme)
            except BadRequestException as e:
                if (e.status == 400
                        and e.reason == 'DotfulBucketNameNotUnderTld'
                        and bucket_url.scheme == 'gs'):
                    bucket_name = bucket_url.bucket_name
                    final_comp = bucket_name[bucket_name.rfind('.') + 1:]
                    raise CommandException('\n'.join(
                        textwrap.wrap(
                            'Buckets with "." in the name must be valid DNS names. The bucket'
                            ' you are attempting to create (%s) is not a valid DNS name,'
                            ' because the final component (%s) is not currently a valid part'
                            ' of the top-level DNS tree.' %
                            (bucket_name, final_comp))))
                else:
                    raise

        return 0
Beispiel #25
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='afIrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Beispiel #26
0
    def RunCommand(self):
        """Command entry point for the compose command."""
        target_url_str = self.args[-1]
        self.args = self.args[:-1]
        target_url = StorageUrlFromString(target_url_str)
        self.CheckProvider(target_url)
        if target_url.HasGeneration():
            raise CommandException(
                'A version-specific URL (%s) cannot be '
                'the destination for gsutil compose - abort.' % target_url)

        dst_obj_metadata = apitools_messages.Object(
            name=target_url.object_name, bucket=target_url.bucket_name)

        components = []
        # Remember the first source object so we can get its content type.
        first_src_url = None
        for src_url_str in self.args:
            if ContainsWildcard(src_url_str):
                src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
            else:
                src_url_iter = [
                    BucketListingObject(StorageUrlFromString(src_url_str))
                ]
            for blr in src_url_iter:
                src_url = blr.storage_url
                self.CheckProvider(src_url)

                if src_url.bucket_name != target_url.bucket_name:
                    raise CommandException(
                        'GCS does not support inter-bucket composing.')

                if not first_src_url:
                    first_src_url = src_url
                src_obj_metadata = (apitools_messages.ComposeRequest.
                                    SourceObjectsValueListEntry(
                                        name=src_url.object_name))
                if src_url.HasGeneration():
                    src_obj_metadata.generation = int(src_url.generation)
                components.append(src_obj_metadata)
                # Avoid expanding too many components, and sanity check each name
                # expansion result.
                if len(components) > MAX_COMPOSE_ARITY:
                    raise CommandException(
                        '"compose" called with too many component '
                        'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

        if not components:
            raise CommandException(
                '"compose" requires at least 1 component object.')

        dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
            first_src_url.bucket_name,
            first_src_url.object_name,
            provider=first_src_url.scheme,
            fields=['contentType']).contentType

        preconditions = PreconditionsFromHeaders(self.headers or {})

        self.logger.info('Composing %s from %d component object(s).',
                         target_url, len(components))
        self.gsutil_api.ComposeObject(
            components,
            dst_obj_metadata,
            preconditions=preconditions,
            provider=target_url.scheme,
            encryption_tuple=GetEncryptionKeyWrapper(config))
Beispiel #27
0
    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Beispiel #28
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='afrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
    )
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                self.args,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if not self.everything_removed_okay and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            had_previous_failures = GetFailureCount() > 0
            folder_object_wildcards = []
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith('No URLs matched:'):
                        raise
                if not had_previous_failures:
                    ResetFailureCount()

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        return 0