def _RsyncFunc(cls, diff_to_apply, thread_state=None): """Worker function for performing the actual copy and remove operations.""" gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) dst_url_str = diff_to_apply.dst_url_str dst_url = StorageUrlFromString(dst_url_str) if diff_to_apply.diff_action == _DiffAction.REMOVE: if cls.dryrun: cls.logger.info('Would remove %s', dst_url) else: cls.logger.info('Removing %s', dst_url) if dst_url.IsFileUrl(): os.unlink(dst_url.object_name) else: try: gsutil_api.DeleteObject( dst_url.bucket_name, dst_url.object_name, generation=dst_url.generation, provider=dst_url.scheme) except NotFoundException: # If the object happened to be deleted by an external process, this # is fine because it moves us closer to the desired state. pass elif diff_to_apply.diff_action == _DiffAction.COPY: src_url_str = diff_to_apply.src_url_str src_url = StorageUrlFromString(src_url_str) if cls.dryrun: cls.logger.info('Would copy %s to %s', src_url, dst_url) else: copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api, cls, _RsyncExceptionHandler, headers=cls.headers) else: raise CommandException('Got unexpected DiffAction (%d)' % diff_to_apply.diff_action)
def CreateWildcardIterator(url_str, gsutil_api, all_versions=False, debug=0, project_id=None, ignore_symlinks=False): """Instantiate a WildcardIterator for the given URL string. Args: url_str: URL string naming wildcard object(s) to iterate. gsutil_api: Cloud storage interface. Passed in for thread safety, also settable for testing/mocking. all_versions: If true, the iterator yields all versions of objects matching the wildcard. If false, yields just the live object version. debug: Debug level to control debug output for iterator. project_id: Project id to use for bucket listings. ignore_symlinks: For FileUrls, ignore symlinks during iteration if true. Returns: A WildcardIterator that handles the requested iteration. """ url = StorageUrlFromString(url_str) if url.IsFileUrl(): return FileWildcardIterator(url, debug=debug, ignore_symlinks=ignore_symlinks) else: # Cloud URL return CloudWildcardIterator(url, gsutil_api, all_versions=all_versions, debug=debug, project_id=project_id)
def CreateWildcardIterator(url_str, gsutil_api, all_versions=False, project_id=None, ignore_symlinks=False, logger=None): """Instantiate a WildcardIterator for the given URL string. Args: url_str: URL string naming wildcard object(s) to iterate. gsutil_api: Cloud storage interface. Passed in for thread safety, also settable for testing/mocking. all_versions: If true, the iterator yields all versions of objects matching the wildcard. If false, yields just the live object version. project_id: Project id to use for bucket listings. ignore_symlinks: For FileUrls, ignore symlinks during iteration if true. logger: logging.Logger used for outputting debug messages during iteration. If None, the root logger will be used. Returns: A WildcardIterator that handles the requested iteration. """ url = StorageUrlFromString(url_str) logger = logger or logging.getLogger() if url.IsFileUrl(): return FileWildcardIterator(url, ignore_symlinks=ignore_symlinks, logger=logger) else: # Cloud URL return CloudWildcardIterator(url, gsutil_api, all_versions=all_versions, project_id=project_id)
def test_storage_url_from_string(self): storage_url = StorageUrlFromString('abc') self.assertTrue(storage_url.IsFileUrl()) self.assertEquals('abc', storage_url.object_name) storage_url = StorageUrlFromString('file://abc/123') self.assertTrue(storage_url.IsFileUrl()) self.assertEquals('abc/123', storage_url.object_name) storage_url = StorageUrlFromString('gs://abc/123') self.assertTrue(storage_url.IsCloudUrl()) self.assertEquals('abc', storage_url.bucket_name) self.assertEquals('123', storage_url.object_name) storage_url = StorageUrlFromString('s3://abc/123') self.assertTrue(storage_url.IsCloudUrl()) self.assertEquals('abc', storage_url.bucket_name) self.assertEquals('123', storage_url.object_name)
def _ComputeNeededFileChecksums(logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5): """Computes any file checksums needed by _ObjectsMatch. Args: logger: logging.logger for outputting log messages. src_url_str: Source URL string. src_size: Source size src_crc32c: Source CRC32c. src_md5: Source MD5. dst_url_str: Destination URL string. dst_size: Destination size dst_crc32c: Destination CRC32c. dst_md5: Destination MD5. Returns: (src_crc32c, src_md5, dst_crc32c, dst_md5) """ src_url = StorageUrlFromString(src_url_str) dst_url = StorageUrlFromString(dst_url_str) if src_url.IsFileUrl(): if dst_crc32c != _NA or dst_url.IsFileUrl(): if src_size > TEN_MIB: logger.info('Computing MD5 for %s...', src_url_str) with open(src_url.object_name, 'rb') as fp: src_crc32c = CalculateB64EncodedCrc32cFromContents(fp) elif dst_md5 != _NA or dst_url.IsFileUrl(): if dst_size > TEN_MIB: logger.info('Computing MD5 for %s...', dst_url_str) with open(src_url.object_name, 'rb') as fp: src_md5 = CalculateB64EncodedMd5FromContents(fp) if dst_url.IsFileUrl(): if src_crc32c != _NA: if src_size > TEN_MIB: logger.info('Computing CRC32C for %s...', src_url_str) with open(dst_url.object_name, 'rb') as fp: dst_crc32c = CalculateB64EncodedCrc32cFromContents(fp) elif src_md5 != _NA: if dst_size > TEN_MIB: logger.info('Computing CRC32C for %s...', dst_url_str) with open(dst_url.object_name, 'rb') as fp: dst_md5 = CalculateB64EncodedMd5FromContents(fp) return (src_crc32c, src_md5, dst_crc32c, dst_md5)
def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = ( self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: for file_ref in self.WildcardIterator( url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash', 'customerEncryption', 'size']): matched_one = True url = StorageUrlFromString(url_str) file_name = file_ref.storage_url.object_name if StorageUrlFromString(url_str).IsFileUrl(): file_size = os.path.getsize(file_name) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=False, message_type=FileMessage.FILE_HASH)) callback_processor = ProgressCallbackWithTimeout( file_size, FileProgressCallbackHandler( self.gsutil_api.status_queue, src_url=StorageUrlFromString(url_str), operation_name='Hashing').call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents(fp, hash_dict, callback_processor=callback_processor) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=True, message_type=FileMessage.FILE_HASH)) else: hash_dict = {} obj_metadata = file_ref.root_object file_size = obj_metadata.size md5_present = obj_metadata.md5Hash is not None crc32c_present = obj_metadata.crc32c is not None if not md5_present and not crc32c_present: logging.getLogger().warn('No hashes present for %s', url_str) continue if md5_present: hash_dict['md5'] = obj_metadata.md5Hash if crc32c_present: hash_dict['crc32c'] = obj_metadata.crc32c print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, (format_func(digest) if url.IsFileUrl() else cloud_format_func(digest))) if not matched_one: raise CommandException('No files matched') PutToQueueWithTimeout(self.gsutil_api.status_queue, FinalMessage(time.time())) return 0
def _DiffToApplyArgChecker(command_instance, diff_to_apply): """Arg checker that skips symlinks if -e flag specified.""" if (diff_to_apply.diff_action == _DiffAction.REMOVE or not command_instance.exclude_symlinks): # No src URL is populated for REMOVE actions. return True exp_src_url = StorageUrlFromString(diff_to_apply.src_url_str) if exp_src_url.IsFileUrl() and os.path.islink(exp_src_url.object_name): command_instance.logger.info('Skipping symbolic link %s...', exp_src_url) return False return True
def HaveFileUrls(args_to_check): """Checks whether args_to_check contain any file URLs. Args: args_to_check: Command-line argument subset to check. Returns: True if args_to_check contains any file URLs. """ for url_str in args_to_check: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): return True return False
def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'location', 'storageClass', 'versioning', 'acl', 'defaultObjectAcl', 'website', 'logging', 'cors', 'lifecycle' ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend( ['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = None ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin else: f = open(a, 'r') try: for line in f: line = line.strip() if line: self.exclude_patterns.append(line) finally: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, name): if not self.summary_only: self._PrintSummaryLine(num_bytes, name) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # ls_helper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets( bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0