def _SetDefStorageClass(self): """Sets the default storage class for a bucket.""" # At this point, "set" has been popped off the front of self.args. normalized_storage_class = NormalizeStorageClass(self.args[0]) url_args = self.args[1:] if not url_args: self.RaiseWrongNumberOfArgumentsException() some_matched = False for url_str in url_args: self._CheckIsGsUrl(url_str) # Throws a CommandException if the argument is not a bucket. bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id']) for blr in bucket_iter: some_matched = True bucket_metadata = apitools_messages.Bucket() self.logger.info( 'Setting default storage class to "%s" for bucket %s' % (normalized_storage_class, blr.url_string.rstrip('/'))) bucket_metadata.storageClass = normalized_storage_class self.gsutil_api.PatchBucket(blr.storage_url.bucket_name, bucket_metadata, provider=blr.storage_url.scheme, fields=['id']) if not some_matched: raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))
def RunCommand(self): """Command entry point for the mb command.""" location = None storage_class = None if self.sub_opts: for o, a in self.sub_opts: if o == '-l': location = a elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii( a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-c' or o == '-s': storage_class = NormalizeStorageClass(a) bucket_metadata = apitools_messages.Bucket(location=location, storageClass=storage_class) for bucket_url_str in self.args: bucket_url = StorageUrlFromString(bucket_url_str) if not bucket_url.IsBucket(): raise CommandException( 'The mb command requires a URL that specifies a ' 'bucket.\n"%s" is not valid.' % bucket_url) if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)): raise InvalidUrlError('Invalid bucket name in URL "%s"' % bucket_url.bucket_name) self.logger.info('Creating %s...', bucket_url) # Pass storage_class param only if this is a GCS bucket. (In S3 the # storage class is specified on the key object.) try: self.gsutil_api.CreateBucket(bucket_url.bucket_name, project_id=self.project_id, metadata=bucket_metadata, provider=bucket_url.scheme) except BadRequestException as e: if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and bucket_url.scheme == 'gs'): bucket_name = bucket_url.bucket_name final_comp = bucket_name[bucket_name.rfind('.') + 1:] raise CommandException('\n'.join( textwrap.wrap( 'Buckets with "." in the name must be valid DNS names. The bucket' ' you are attempting to create (%s) is not a valid DNS name,' ' because the final component (%s) is not currently a valid part' ' of the top-level DNS tree.' % (bucket_name, final_comp)))) else: raise return 0
def RunCommand(self): """Command entry point for the defstorageclass command.""" action_subcommand = self.args.pop(0) subcommand_args = [action_subcommand] if action_subcommand == 'get': func = self._GetDefStorageClass elif action_subcommand == 'set': func = self._SetDefStorageClass normalized_storage_class = NormalizeStorageClass(self.args[0]) subcommand_args.append(normalized_storage_class) else: raise CommandException( ('Invalid subcommand "%s" for the %s command.\n' 'See "gsutil help %s".') % (action_subcommand, self.command_name, self.command_name)) metrics.LogCommandParams(subcommands=subcommand_args) func() return 0
def RunCommand(self): """Command entry point for the mb command.""" bucket_policy_only = None location = None storage_class = None seconds = None if self.sub_opts: for o, a in self.sub_opts: if o == '-l': location = a elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii( a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-c' or o == '-s': storage_class = NormalizeStorageClass(a) elif o == '--retention': seconds = RetentionInSeconds(a) elif o == '-b': if self.gsutil_api.GetApiSelector( 'gs') != ApiSelector.JSON: raise CommandException( 'The -b <on|off> option ' 'can only be used with the JSON API') InsistOnOrOff( a, 'Only on and off values allowed for -b option') bucket_policy_only = (a == 'on') bucket_metadata = apitools_messages.Bucket(location=location, storageClass=storage_class) if bucket_policy_only: bucket_metadata.iamConfiguration = IamConfigurationValue() iam_config = bucket_metadata.iamConfiguration iam_config.bucketPolicyOnly = BucketPolicyOnlyValue() iam_config.bucketPolicyOnly.enabled = bucket_policy_only for bucket_url_str in self.args: bucket_url = StorageUrlFromString(bucket_url_str) if seconds is not None: if bucket_url.scheme != 'gs': raise CommandException( 'Retention policy can only be specified for ' 'GCS buckets.') retention_policy = ( apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=seconds)) bucket_metadata.retentionPolicy = retention_policy if not bucket_url.IsBucket(): raise CommandException( 'The mb command requires a URL that specifies a ' 'bucket.\n"%s" is not valid.' % bucket_url) if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)): raise InvalidUrlError('Invalid bucket name in URL "%s"' % bucket_url.bucket_name) self.logger.info('Creating %s...', bucket_url) # Pass storage_class param only if this is a GCS bucket. (In S3 the # storage class is specified on the key object.) try: self.gsutil_api.CreateBucket(bucket_url.bucket_name, project_id=self.project_id, metadata=bucket_metadata, provider=bucket_url.scheme) except BadRequestException as e: if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and bucket_url.scheme == 'gs'): bucket_name = bucket_url.bucket_name final_comp = bucket_name[bucket_name.rfind('.') + 1:] raise CommandException('\n'.join( textwrap.wrap( 'Buckets with "." in the name must be valid DNS names. The bucket' ' you are attempting to create (%s) is not a valid DNS name,' ' because the final component (%s) is not currently a valid part' ' of the top-level DNS tree.' % (bucket_name, final_comp)))) else: raise return 0
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). # Store metadata about src encryption to make logic below easier to read. src_encryption_kms_key = (src_metadata.kmsKeyName if src_metadata.kmsKeyName else None) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 # In python3, hashes are bytes, use ascii since it should be ascii src_encryption_sha256 = src_encryption_sha256.encode('ascii') src_was_encrypted = (src_encryption_sha256 is not None or src_encryption_kms_key is not None) # Also store metadata about dest encryption. dest_encryption_kms_key = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK): dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key dest_encryption_sha256 = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK): dest_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256) should_encrypt_dest = self.boto_file_encryption_keywrapper is not None encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and src_encryption_kms_key == dest_encryption_kms_key) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in self.transform_types and not encryption_unchanged): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Determine if we can skip this rewrite operation (this should only be done # when ALL of the specified transformations are redundant). redundant_transforms = [] # STORAGE_CLASS transform is redundant if the target storage class matches # the existing storage class. if (_TransformTypes.STORAGE_CLASS in self.transform_types and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): redundant_transforms.append('storage class') # CRYPTO_KEY transform is redundant if we're using the same encryption # key that was used to encrypt the source. However, if no encryption key was # specified, we should still perform the rewrite. This results in the # rewritten object either being encrypted with its bucket's default KMS key # or having no CSEK/CMEK encryption applied. While we could attempt fetching # the bucket's metadata and checking its default KMS key before performing # the rewrite (in the case where we appear to be transitioning from # no key to no key), that is vulnerable to the race condition where the # default KMS key is changed between when we check it and when we rewrite # the object. if (_TransformTypes.CRYPTO_KEY in self.transform_types and should_encrypt_dest and encryption_unchanged): redundant_transforms.append('encryption key') if len(redundant_transforms) == len(self.transform_types): self.logger.info('Skipping %s, all transformations were redundant: %s' % (transform_url, redundant_transforms)) return # First make a deep copy of the source metadata, then overwrite any # requested attributes (e.g. if a storage class change was specified). dest_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dest_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dest_metadata.id = None # Ensure we don't copy over the KMS key name or CSEK key info from the # source object; those should only come from the boto config's # encryption_key value. dest_metadata.customerEncryption = None dest_metadata.kmsKeyName = None # Both a storage class change and CMEK encryption should be set as part of # the dest object's metadata. CSEK encryption, if specified, is added to the # request later via headers obtained from the keywrapper value passed to # encryption_tuple. if _TransformTypes.STORAGE_CLASS in self.transform_types: dest_metadata.storageClass = self.dest_storage_class if dest_encryption_kms_key is not None: dest_metadata.kmsKeyName = dest_encryption_kms_key # Make sure we have the CSEK key necessary to decrypt. decryption_keywrapper = None if src_encryption_sha256 is not None: if src_encryption_sha256 in self.csek_hash_to_keywrapper: decryption_keywrapper = ( self.csek_hash_to_keywrapper[src_encryption_sha256]) else: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) operation_name = 'Rewriting' if _TransformTypes.CRYPTO_KEY in self.transform_types: if src_was_encrypted and should_encrypt_dest: if not encryption_unchanged: operation_name = 'Rotating' # Else, keep "Rewriting". This might occur when -k was specified and was # redundant, but we're performing the operation anyway because some # other transformation was not redundant. elif src_was_encrypted and not should_encrypt_dest: operation_name = 'Decrypting' elif not src_was_encrypted and should_encrypt_dest: operation_name = 'Encrypting' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) sys.stderr.flush() # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dest_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_keywrapper, encryption_tuple=self.boto_file_encryption_keywrapper, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.csek_hash_to_keywrapper = {} self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config) self.boto_file_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256 if self.boto_file_encryption_keywrapper else None) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException('No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException('The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Rather than have each worker repeatedly calculate the sha256 hash for each # decryption_key in the boto config, do this once now and cache the results. for i in range(0, MAX_DECRYPTION_KEYS): key_number = i + 1 keywrapper = CryptoKeyWrapperFromKey( config.get('GSUtil', 'decryption_key%s' % str(key_number), None)) if keywrapper is None: # Stop at first attribute absence in lexicographical iteration. break if keywrapper.crypto_type == CryptoKeyType.CSEK: self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper # Also include the encryption_key, since it should be used to decrypt and # then encrypt if the object's CSEK should remain the same. if self.boto_file_encryption_sha256 is not None: self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = ( self.boto_file_encryption_keywrapper) if self.boto_file_encryption_keywrapper is None: msg = '\n'.join( textwrap.wrap( 'NOTE: No encryption_key was specified in the boto configuration ' 'file, so gsutil will not provide an encryption key in its rewrite ' 'API requests. This will decrypt the objects unless they are in ' 'buckets with a default KMS key set, in which case the service ' 'will automatically encrypt the rewritten objects with that key.') ) print('%s\n' % msg, file=sys.stderr) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException('%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the mb command.""" autoclass = False bucket_policy_only = None kms_key = None location = None storage_class = None seconds = None public_access_prevention = None rpo = None json_only_flags_in_command = [] if self.sub_opts: for o, a in self.sub_opts: if o == '--autoclass': autoclass = True json_only_flags_in_command.append(o) elif o == '-k': kms_key = a ValidateCMEK(kms_key) json_only_flags_in_command.append(o) elif o == '-l': location = a elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii( a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-c' or o == '-s': storage_class = NormalizeStorageClass(a) elif o == '--retention': seconds = RetentionInSeconds(a) elif o == '--rpo': rpo = a.strip() if rpo not in VALID_RPO_VALUES: raise CommandException( 'Invalid value for --rpo. Must be one of: {},' ' provided: {}'.format(VALID_RPO_VALUES_STRING, a)) json_only_flags_in_command.append(o) elif o == '-b': InsistOnOrOff( a, 'Only on and off values allowed for -b option') bucket_policy_only = (a == 'on') json_only_flags_in_command.append(o) elif o == '--pap': public_access_prevention = a json_only_flags_in_command.append(o) bucket_metadata = apitools_messages.Bucket(location=location, rpo=rpo, storageClass=storage_class) if autoclass: bucket_metadata.autoclass = apitools_messages.Bucket.AutoclassValue( enabled=autoclass) if bucket_policy_only or public_access_prevention: bucket_metadata.iamConfiguration = IamConfigurationValue() iam_config = bucket_metadata.iamConfiguration if bucket_policy_only: iam_config.bucketPolicyOnly = BucketPolicyOnlyValue() iam_config.bucketPolicyOnly.enabled = bucket_policy_only if public_access_prevention: iam_config.publicAccessPrevention = public_access_prevention if kms_key: encryption = apitools_messages.Bucket.EncryptionValue() encryption.defaultKmsKeyName = kms_key bucket_metadata.encryption = encryption for bucket_url_str in self.args: bucket_url = StorageUrlFromString(bucket_url_str) if seconds is not None: if bucket_url.scheme != 'gs': raise CommandException( 'Retention policy can only be specified for ' 'GCS buckets.') retention_policy = ( apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=seconds)) bucket_metadata.retentionPolicy = retention_policy if json_only_flags_in_command and self.gsutil_api.GetApiSelector( bucket_url.scheme) != ApiSelector.JSON: raise CommandException( 'The {} option(s) can only be used for GCS' ' Buckets with the JSON API'.format( ', '.join(json_only_flags_in_command))) if not bucket_url.IsBucket(): raise CommandException( 'The mb command requires a URL that specifies a ' 'bucket.\n"%s" is not valid.' % bucket_url) if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)): raise InvalidUrlError('Invalid bucket name in URL "%s"' % bucket_url.bucket_name) self.logger.info('Creating %s...', bucket_url) # Pass storage_class param only if this is a GCS bucket. (In S3 the # storage class is specified on the key object.) try: self.gsutil_api.CreateBucket(bucket_url.bucket_name, project_id=self.project_id, metadata=bucket_metadata, provider=bucket_url.scheme) except AccessDeniedException as e: message = e.reason if 'key' in message: # This will print the error reason and append the following as a # suggested next step: # # To authorize, run: # gsutil kms authorize \ # -k <kms_key> \ # -p <project_id> message += ' To authorize, run:\n gsutil kms authorize' message += ' \\\n -k %s' % kms_key if (self.project_id): message += ' \\\n -p %s' % self.project_id raise CommandException(message) else: raise except BadRequestException as e: if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and bucket_url.scheme == 'gs'): bucket_name = bucket_url.bucket_name final_comp = bucket_name[bucket_name.rfind('.') + 1:] raise CommandException('\n'.join( textwrap.wrap( 'Buckets with "." in the name must be valid DNS names. The bucket' ' you are attempting to create (%s) is not a valid DNS name,' ' because the final component (%s) is not currently a valid part' ' of the top-level DNS tree.' % (bucket_name, final_comp)))) else: raise return 0