def __init__(self, command_name, proj_id_handler, headers, debug, bucket_storage_uri_class): """ Args: command_name: name of command being run. proj_id_handler: ProjectIdHandler to use for current command. headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. """ self.command_name = command_name self.proj_id_handler = proj_id_handler self.headers = headers self.debug = debug self.bucket_storage_uri_class = bucket_storage_uri_class self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. # (A flat listing means show all objects expanded all the way down.) self._flatness_wildcard = {True: '**', False: '*'}
def test_filter_existing_components_versioned(self): bucket_name = 'filter_existing_components_bucket_versioned' bucket_uri = self.CreateVersionedBucket(bucket_name=bucket_name) # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, key_uploaded_correctly, key_uploaded_correctly.generation, {}) # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', bucket_uri=bucket_uri) args_wrong_contents = PerformResumableUploadIfAppliesArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, key_wrong_contents.generation, {}) dst_args = {fpath_uploaded_correctly:args_uploaded_correctly, fpath_wrong_contents:args_wrong_contents} existing_components = [ObjectFromTracker(fpath_uploaded_correctly, key_uploaded_correctly.generation), ObjectFromTracker(fpath_wrong_contents, key_wrong_contents.generation)] suri_builder = StorageUriBuilder(0, BucketStorageUri) (components_to_upload, uploaded_components, existing_objects_to_delete) = ( FilterExistingComponents(dst_args, existing_components, bucket_uri.bucket_name, suri_builder)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(str([args_uploaded_correctly.dst_uri]), str(uploaded_components)) expected_to_delete = [(args_wrong_contents.dst_uri.object_name, args_wrong_contents.dst_uri.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
def __init__(self, command_name, proj_id_handler, headers, debug, bucket_storage_uri_class, uri_strs, recursion_requested, have_existing_dst_container=None, flat=True, all_versions=False, for_all_version_delete=False): """ Args: command_name: name of command being run. proj_id_handler: ProjectIdHandler to use for current command. headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. uri_strs: PluralityCheckableIterator of URI strings needing expansion. recursion_requested: True if -R specified on command-line. have_existing_dst_container: Bool indicator whether this is a copy request to an existing bucket, bucket subdir, or directory. Default None value should be used in cases where this is not needed (commands other than cp). flat: Bool indicating whether bucket listings should be flattened, i.e., so the mapped-to results contain objects spanning subdirectories. all_versions: Bool indicating whether to iterate over all object versions. for_all_version_delete: Bool indicating whether this is for an all-version delete. Examples of _NameExpansionIterator with flat=True: - Calling with one of the uri_strs being 'gs://bucket' will enumerate all top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. - 'gs://bucket/**' will enumerate all objects in the bucket. - 'gs://bucket/abc' will enumerate all next-level objects under directory abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* matches any objects; otherwise it will enumerate the single name gs://bucket/abc - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its subdirectories. - 'file:///tmp' will enumerate all files under /tmp, as will 'file:///tmp/*' - 'file:///tmp/**' will enumerate all files under /tmp or any of its subdirectories. Example if flat=False: calling with gs://bucket/abc/* lists matching objects or subdirs, but not sub-subdirs or objects beneath subdirs. Note: In step-by-step comments below we give examples assuming there's a gs://bucket with object paths: abcd/o1.txt abcd/o2.txt xyz/o1.txt xyz/o2.txt and a directory file://dir with file paths: dir/a.txt dir/b.txt dir/c/ """ self.command_name = command_name self.proj_id_handler = proj_id_handler self.headers = headers self.debug = debug self.bucket_storage_uri_class = bucket_storage_uri_class self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) self.uri_strs = uri_strs self.recursion_requested = recursion_requested self.have_existing_dst_container = have_existing_dst_container self.flat = flat self.all_versions = all_versions # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. # (A flat listing means show all objects expanded all the way down.) self._flatness_wildcard = {True: '**', False: '*'}
class _NameExpansionIterator(object): """ Iterates over all src_uris, expanding wildcards, object-less bucket names, subdir bucket names, and directory names, generating a flat listing of all the matching objects/files. You should instantiate this object using the static factory function NameExpansionIterator, because consumers of this iterator need the PluralityCheckableIterator wrapper built by that function. Yields: gslib.name_expansion.NameExpansionResult. Raises: CommandException: if errors encountered. """ def __init__(self, command_name, proj_id_handler, headers, debug, bucket_storage_uri_class, uri_strs, recursion_requested, have_existing_dst_container=None, flat=True, all_versions=False, for_all_version_delete=False): """ Args: command_name: name of command being run. proj_id_handler: ProjectIdHandler to use for current command. headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. uri_strs: PluralityCheckableIterator of URI strings needing expansion. recursion_requested: True if -R specified on command-line. have_existing_dst_container: Bool indicator whether this is a copy request to an existing bucket, bucket subdir, or directory. Default None value should be used in cases where this is not needed (commands other than cp). flat: Bool indicating whether bucket listings should be flattened, i.e., so the mapped-to results contain objects spanning subdirectories. all_versions: Bool indicating whether to iterate over all object versions. for_all_version_delete: Bool indicating whether this is for an all-version delete. Examples of _NameExpansionIterator with flat=True: - Calling with one of the uri_strs being 'gs://bucket' will enumerate all top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. - 'gs://bucket/**' will enumerate all objects in the bucket. - 'gs://bucket/abc' will enumerate all next-level objects under directory abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* matches any objects; otherwise it will enumerate the single name gs://bucket/abc - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its subdirectories. - 'file:///tmp' will enumerate all files under /tmp, as will 'file:///tmp/*' - 'file:///tmp/**' will enumerate all files under /tmp or any of its subdirectories. Example if flat=False: calling with gs://bucket/abc/* lists matching objects or subdirs, but not sub-subdirs or objects beneath subdirs. Note: In step-by-step comments below we give examples assuming there's a gs://bucket with object paths: abcd/o1.txt abcd/o2.txt xyz/o1.txt xyz/o2.txt and a directory file://dir with file paths: dir/a.txt dir/b.txt dir/c/ """ self.command_name = command_name self.proj_id_handler = proj_id_handler self.headers = headers self.debug = debug self.bucket_storage_uri_class = bucket_storage_uri_class self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) self.uri_strs = uri_strs self.recursion_requested = recursion_requested self.have_existing_dst_container = have_existing_dst_container self.flat = flat self.all_versions = all_versions # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. # (A flat listing means show all objects expanded all the way down.) self._flatness_wildcard = {True: '**', False: '*'} def __iter__(self): for uri_str in self.uri_strs: # Step 1: Expand any explicitly specified wildcards. The output from this # step is an iterator of BucketListingRef. # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd if ContainsWildcard(uri_str): post_step1_iter = self._WildcardIterator(uri_str) else: suri = self.suri_builder.StorageUri(uri_str) post_step1_iter = iter([BucketListingRef(suri)]) post_step1_iter = PluralityCheckableIterator(post_step1_iter) # Step 2: Expand bucket subdirs and versions. The output from this # step is an iterator of (names_container, BucketListingRef). # Starting with gs://bucket/abcd this step would expand to: # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). if self.flat and self.recursion_requested: post_step2_iter = _ImplicitBucketSubdirIterator( self, post_step1_iter, self.flat) elif self.all_versions: post_step2_iter = _AllVersionIterator(self, post_step1_iter, headers=self.headers) else: post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) post_step2_iter = PluralityCheckableIterator(post_step2_iter) # Step 3. Expand directories and buckets. This step yields the iterated # values. Starting with gs://bucket this step would expand to: # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] # Starting with file://dir this step would expand to: # [dir/a.txt, dir/b.txt, dir/c/] exp_src_bucket_listing_refs = [] wc = self._flatness_wildcard[self.flat] src_uri_expands_to_multi = (post_step1_iter.has_plurality() or post_step2_iter.has_plurality()) is_multi_src_request = (self.uri_strs.has_plurality() or src_uri_expands_to_multi) if post_step2_iter.is_empty(): raise CommandException('No URIs matched: %s' % uri_str) for (names_container, blr) in post_step2_iter: if (not blr.GetUri().names_container() and (self.flat or not blr.HasPrefix())): yield NameExpansionResult(uri_str, is_multi_src_request, src_uri_expands_to_multi, names_container, blr.GetUriString(), self.have_existing_dst_container, is_latest=blr.IsLatest()) continue if not self.recursion_requested: if blr.GetUri().is_file_uri(): desc = 'directory' else: desc = 'bucket' print 'Omitting %s "%s". (Did you mean to do %s -R?)' % ( desc, blr.GetUri(), self.command_name) continue if blr.GetUri().is_file_uri(): # Convert dir to implicit recursive wildcard. uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc) else: # Convert bucket to implicit recursive wildcard. uri_to_iterate = blr.GetUri().clone_replace_name(wc) wc_iter = PluralityCheckableIterator( self._WildcardIterator(uri_to_iterate)) src_uri_expands_to_multi = (src_uri_expands_to_multi or wc_iter.has_plurality()) is_multi_src_request = (self.uri_strs.has_plurality() or src_uri_expands_to_multi) for blr in wc_iter: yield NameExpansionResult(uri_str, is_multi_src_request, src_uri_expands_to_multi, True, blr.GetUriString(), self.have_existing_dst_container, is_latest=blr.IsLatest()) def _WildcardIterator(self, uri_or_str): """ Helper to instantiate gslib.WildcardIterator. Args are same as gslib.WildcardIterator interface, but this method fills in most of the values from instance state. Args: uri_or_str: StorageUri or URI string naming wildcard objects to iterate. """ return wildcard_iterator.wildcard_iterator( uri_or_str, self.proj_id_handler, bucket_storage_uri_class=self.bucket_storage_uri_class, headers=self.headers, debug=self.debug, all_versions=self.all_versions)
class Command(object): REQUIRED_SPEC_KEYS = [COMMAND_NAME] # Each subclass must define the following map, minimally including the # keys in REQUIRED_SPEC_KEYS; other values below will be used as defaults, # although for readbility subclasses should specify the complete map. command_spec = { # Name of command. COMMAND_NAME: None, # List of command name aliases. COMMAND_NAME_ALIASES: [], # Min number of args required by this command. MIN_ARGS: 0, # Max number of args required by this command, or NO_MAX. MAX_ARGS: NO_MAX, # Getopt-style string specifying acceptable sub args. SUPPORTED_SUB_ARGS: '', # True if file URIs are acceptable for this command. FILE_URIS_OK: False, # True if provider-only URIs are acceptable for this command. PROVIDER_URIS_OK: False, # Index in args of first URI arg. URIS_START_ARG: 0, # True if must configure gsutil before running command. CONFIG_REQUIRED: True, } _default_command_spec = command_spec help_spec = HelpProvider.help_spec """Define an empty test specification, which derived classes must populate. This is a list of tuples containing the following values: step_name - mnemonic name for test, displayed when test is run cmd_line - shell command line to run test expect_ret or None - expected return code from test (None means ignore) (result_file, expect_file) or None - tuple of result file and expected file to diff for additional test verification beyond the return code (None means no diff requested) Notes: - Setting expected_ret to None means there is no expectation and, hence, any returned value will pass. - Any occurrences of the string 'gsutil' in the cmd_line parameter are expanded to the full path to the gsutil command under test. - The cmd_line, result_file and expect_file parameters may contain the following special substrings: $Bn - converted to one of 10 unique-for-testing bucket names (n=0..9) $On - converted to one of 10 unique-for-testing object names (n=0..9) $Fn - converted to one of 10 unique-for-testing file names (n=0..9) $G - converted to the directory where gsutil is installed. Useful for referencing test data. - The generated file names are full pathnames, whereas the generated bucket and object names are simple relative names. - Tests with a non-None result_file and expect_file automatically trigger an implicit diff of the two files. - These test specifications, in combination with the conversion strings allow tests to be constructed parametrically. For example, here's an annotated subset of a test_steps for the cp command: # Copy local file to object, verify 0 return code. ('simple cp', 'gsutil cp $F1 gs://$B1/$O1', 0, None, None), # Copy uploaded object back to local file and diff vs. orig file. ('verify cp', 'gsutil cp gs://$B1/$O1 $F2', 0, '$F2', '$F1'), - After pattern substitution, the specs are run sequentially, in the order in which they appear in the test_steps list. """ test_steps = [] # Define a convenience property for command name, since it's used many places. def _GetDefaultCommandName(self): return self.command_spec[COMMAND_NAME] command_name = property(_GetDefaultCommandName) def __init__(self, command_runner, args, headers, debug, parallel_operations, config_file_list, bucket_storage_uri_class, test_method=None, logging_filters=None): """ Args: command_runner: CommandRunner (for commands built atop other commands). args: Command-line args (arg0 = actual arg, not command name ala bash). headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). parallel_operations: Should command operations be executed in parallel? config_file_list: Config file list returned by GetBotoConfigFileList(). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. test_method: Optional general purpose method for testing purposes. Application and semantics of this method will vary by command and test type. logging_filters: Optional list of logging.Filters to apply to this command's logger. Implementation note: subclasses shouldn't need to define an __init__ method, and instead depend on the shared initialization that happens here. If you do define an __init__ method in a subclass you'll need to explicitly call super().__init__(). But you're encouraged not to do this, because it will make changing the __init__ interface more painful. """ # Save class values from constructor params. self.command_runner = command_runner self.args = args self.unparsed_args = args self.headers = headers self.debug = debug self.parallel_operations = parallel_operations self.config_file_list = config_file_list self.bucket_storage_uri_class = bucket_storage_uri_class self.test_method = test_method self.exclude_symlinks = False self.recursion_requested = False self.all_versions = False # Global instance of a threaded logger object. self.logger = _ThreadedLogger(self.command_name) if logging_filters: for filter in logging_filters: self.logger.addFilter(filter) # Process sub-command instance specifications. # First, ensure subclass implementation sets all required keys. for k in self.REQUIRED_SPEC_KEYS: if k not in self.command_spec or self.command_spec[k] is None: raise CommandException( '"%s" command implementation is missing %s ' 'specification' % (self.command_name, k)) # Now override default command_spec with subclass-specified values. tmp = self._default_command_spec tmp.update(self.command_spec) self.command_spec = tmp del tmp # Make sure command provides a test specification. if not self.test_steps: # TODO: Uncomment following lines when test feature is ready. #raise CommandException('"%s" command implementation is missing test ' #'specification' % self.command_name) pass # Parse and validate args. try: (self.sub_opts, self.args) = getopt.getopt(args, self.command_spec[SUPPORTED_SUB_ARGS]) except GetoptError, e: raise CommandException('%s for "%s" command.' % (e.msg, self.command_name)) if (len(self.args) < self.command_spec[MIN_ARGS] or len(self.args) > self.command_spec[MAX_ARGS]): raise CommandException( 'Wrong number of arguments for "%s" command.' % self.command_name) if (not self.command_spec[FILE_URIS_OK] and self.HaveFileUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support "file://" URIs. ' 'Did you mean to use a gs:// URI?' % self.command_name) if (not self.command_spec[PROVIDER_URIS_OK] and self._HaveProviderUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support provider-only ' 'URIs.' % self.command_name) if self.command_spec[CONFIG_REQUIRED]: self._ConfigureNoOpAuthIfNeeded() self.proj_id_handler = ProjectIdHandler() self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) # Cross-platform path to run gsutil binary. self.gsutil_cmd = '' # Cross-platform list containing gsutil path for use with subprocess. self.gsutil_exec_list = [] # If running on Windows, invoke python interpreter explicitly. if gslib.util.IS_WINDOWS: self.gsutil_cmd += 'python ' self.gsutil_exec_list += ['python'] # Add full path to gsutil to make sure we test the correct version. self.gsutil_path = gslib.GSUTIL_PATH self.gsutil_cmd += self.gsutil_path self.gsutil_exec_list += [self.gsutil_path] # We're treating recursion_requested like it's used by all commands, but # only some of the commands accept the -R option. if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-r' or o == '-R': self.recursion_requested = True break
def test_filter_existing_components_versioned(self): suri_builder = StorageUriBuilder(0, BucketStorageUri) bucket_uri = self.CreateVersionedBucket() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, key_uploaded_correctly, key_uploaded_correctly.generation, {}, tracker_file, tracker_file_lock) # Duplicate object name in tracker file, but uploaded correctly. fpath_duplicate = fpath_uploaded_correctly key_duplicate = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_duplicate = PerformResumableUploadIfAppliesArgs( fpath_duplicate, 0, 1, fpath_duplicate, key_duplicate, key_duplicate.generation, {}, tracker_file, tracker_file_lock) object_name_duplicate = ObjectFromTracker( fpath_duplicate, key_duplicate.generation).object_name uri_duplicate = MakeGsUri(bucket_uri.bucket_name, object_name_duplicate, suri_builder) uri_duplicate.generation = args_duplicate.dst_uri.generation # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', bucket_uri=bucket_uri) args_wrong_contents = PerformResumableUploadIfAppliesArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, key_wrong_contents.generation, {}, tracker_file, tracker_file_lock) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_wrong_contents: args_wrong_contents } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, key_uploaded_correctly.generation), ObjectFromTracker(fpath_duplicate, key_duplicate.generation), ObjectFromTracker(fpath_wrong_contents, key_wrong_contents.generation) ] (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_uri.bucket_name, suri_builder)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(str([args_uploaded_correctly.dst_uri]), str(uploaded_components)) expected_to_delete = [(args_wrong_contents.dst_uri.object_name, args_wrong_contents.dst_uri.generation), (uri_duplicate.object_name, args_duplicate.dst_uri.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
def test_filter_existing_components_non_versioned(self): bucket_uri = self.CreateBucket() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, key_uploaded_correctly, '', {}, tracker_file, tracker_file_lock) # Not yet uploaded, but needed. fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') key_not_uploaded = self.CreateObject(object_name='foo2', contents='2', bucket_uri=bucket_uri) args_not_uploaded = PerformResumableUploadIfAppliesArgs( fpath_not_uploaded, 0, 1, fpath_not_uploaded, key_not_uploaded, '', {}, tracker_file, tracker_file_lock) # Already uploaded, but contents no longer match. Even though the contents # differ, we don't delete this since the bucket is not versioned and it # will be overwritten anyway. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', bucket_uri=bucket_uri) args_wrong_contents = PerformResumableUploadIfAppliesArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, '', {}, tracker_file, tracker_file_lock) # Exists in tracker file, but component object no longer exists. fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') args_remote_deleted = PerformResumableUploadIfAppliesArgs( fpath_remote_deleted, 0, 1, fpath_remote_deleted, '', '', {}, tracker_file, tracker_file_lock) # Exists in tracker file and already uploaded, but no longer needed. fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') key_no_longer_used = self.CreateObject(object_name='foo6', contents='6', bucket_uri=bucket_uri) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_not_uploaded: args_not_uploaded, fpath_wrong_contents: args_wrong_contents, fpath_remote_deleted: args_remote_deleted } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, ''), ObjectFromTracker(fpath_wrong_contents, ''), ObjectFromTracker(fpath_remote_deleted, ''), ObjectFromTracker(fpath_no_longer_used, '') ] suri_builder = StorageUriBuilder(0, BucketStorageUri) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_uri.bucket_name, suri_builder)) for arg in [ args_not_uploaded, args_wrong_contents, args_remote_deleted ]: self.assertTrue(arg in components_to_upload) self.assertEqual(str([args_uploaded_correctly.dst_uri]), str(uploaded_components)) self.assertEqual( str([ MakeGsUri(bucket_uri.bucket_name, fpath_no_longer_used, suri_builder) ]), str(existing_objects_to_delete))
class NameExpansionHandler(object): def __init__(self, command_name, proj_id_handler, headers, debug, bucket_storage_uri_class): """ Args: command_name: name of command being run. proj_id_handler: ProjectIdHandler to use for current command. headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. """ self.command_name = command_name self.proj_id_handler = proj_id_handler self.headers = headers self.debug = debug self.bucket_storage_uri_class = bucket_storage_uri_class self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. # (A flat listing means show all objects expanded all the way down.) self._flatness_wildcard = {True: '**', False: '*'} def WildcardIterator(self, uri_or_str): """ Helper to instantiate gslib.WildcardIterator. Args are same as gslib.WildcardIterator interface, but this method fills in most of the values from class state. Args: uri_or_str: StorageUri or URI string naming wildcard objects to iterate. """ return wildcard_iterator.wildcard_iterator( uri_or_str, self.proj_id_handler, bucket_storage_uri_class=self.bucket_storage_uri_class, headers=self.headers, debug=self.debug) def ExpandWildcardsAndContainers(self, uri_strs, recursion_requested, flat=True): """ Expands wildcards, object-less bucket names, subdir bucket names, and directory names, producing a flat listing of all the matching objects/files. Args: uri_strs: List of URI strings needing expansion. recursion_requested: True if -R specified on command-line. flat: Bool indicating whether bucket listings should be flattened, i.e., so the mapped-to results contain objects spanning subdirectories. Returns: gslib.name_expansion.NameExpansionResult. Raises: CommandException: if errors encountered. Examples with flat=True: - Calling with one of the uri_strs being 'gs://bucket' will enumerate all top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. - 'gs://bucket/**' will enumerate all objects in the bucket. - 'gs://bucket/abc' will enumerate all next-level objects under directory abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* matches any objects; otherwise it will enumerate the single name gs://bucket/abc - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its subdirectories. - 'file:///tmp' will enumerate all files under /tmp, as will 'file:///tmp/*' - 'file:///tmp/**' will enumerate all files under /tmp or any of its subdirectories. Example if flat=False: calling with gs://bucket/abc/* lists matching objects or subdirs, but not sub-subdirs or objects beneath subdirs. Note: In step-by-step comments below we give examples assuming there's a gs://bucket with object paths: abcd/o1.txt abcd/o2.txt xyz/o1.txt xyz/o2.txt and a directory file://dir with file paths: dir/a.txt dir/b.txt dir/c/ """ result = NameExpansionResult() for uri_str in uri_strs: # Step 1: Expand any explicitly specified wildcards. # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd if ContainsWildcard(uri_str): post_step1_bucket_listing_refs = list( self.WildcardIterator(uri_str)) else: post_step1_bucket_listing_refs = [ BucketListingRef(self.suri_builder.StorageUri(uri_str)) ] # Step 2: Expand subdirs. # Starting with gs://bucket/abcd this step would expand to: # [abcd/o1.txt, abcd/o2.txt]. uri_names_container = False if flat: if recursion_requested: post_step2_bucket_listing_refs = [] for bucket_listing_ref in post_step1_bucket_listing_refs: (uri_names_container, bucket_listing_refs) = ( self._DoImplicitBucketSubdirExpansionIfApplicable( bucket_listing_ref.GetUri(), flat)) post_step2_bucket_listing_refs.extend( bucket_listing_refs) else: uri_names_container = False post_step2_bucket_listing_refs = post_step1_bucket_listing_refs else: uri_names_container = False post_step2_bucket_listing_refs = post_step1_bucket_listing_refs # Step 3. Expand directories and buckets. # Starting with gs://bucket this step would expand to: # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] # Starting with file://dir this step would expand to: # [dir/a.txt, dir/b.txt, dir/c/] exp_src_bucket_listing_refs = [] wc = self._flatness_wildcard[flat] for bucket_listing_ref in post_step2_bucket_listing_refs: if (not bucket_listing_ref.GetUri().names_container() and (flat or not bucket_listing_ref.HasPrefix())): exp_src_bucket_listing_refs.append(bucket_listing_ref) continue if not recursion_requested: if bucket_listing_ref.GetUri().is_file_uri(): desc = 'directory' else: desc = 'bucket' print 'Omitting %s "%s". (Did you mean to do %s -R?)' % ( desc, bucket_listing_ref.GetUri(), self.command_name) continue uri_names_container = True if bucket_listing_ref.GetUri().is_file_uri(): # Convert dir to implicit recursive wildcard. uri_to_iter = '%s/%s' % (bucket_listing_ref.GetUriString(), wc) else: # Convert bucket to implicit recursive wildcard. uri_to_iter = bucket_listing_ref.GetUri( ).clone_replace_name(wc) wildcard_result = list(self.WildcardIterator(uri_to_iter)) if len(wildcard_result) > 0: exp_src_bucket_listing_refs.extend(wildcard_result) result._AddExpansion(self.suri_builder.StorageUri(uri_str), uri_names_container, exp_src_bucket_listing_refs) return result def _DoImplicitBucketSubdirExpansionIfApplicable(self, uri, flat): """ Checks whether uri could be an implicit bucket subdir, and expands if so; else returns list containing uri. For example gs://abc would be an implicit bucket subdir if the -R option was specified and gs://abc/* matches anything. Can only be called for -R (recursion requested). Args: uri: StorageUri. flat: bool indicating whether bucket listings should be flattened, i.e., so the mapped-to results contain objects spanning subdirectories. Returns: tuple (names_container, [BucketListingRefs to which uri expanded]) where names_container is true if URI names a directory, bucket, or bucket subdir (vs how StorageUri.names_container() doesn't handle latter case). """ names_container = False result_list = [] if uri.names_object(): # URI could be a bucket subdir. implicit_subdir_matches = list( self.WildcardIterator( self.suri_builder.StorageUri( '%s/%s' % (uri.uri.rstrip('/'), self._flatness_wildcard[flat])))) if len(implicit_subdir_matches) > 0: names_container = True result_list.extend(implicit_subdir_matches) else: result_list.append(BucketListingRef(uri)) else: result_list.append(BucketListingRef(uri)) return (names_container, result_list) def StorageUri(self, uri_str): """ Helper to instantiate boto.StorageUri with gsutil default flag values. Uses self.bucket_storage_uri_class to support mocking/testing. (Identical to the same-named function in command.py; that and this copy make it convenient to call StorageUri() with a single argument, from the respective classes.) Args: uri_str: StorageUri naming bucket + optional object. Returns: boto.StorageUri for given uri_str. Raises: InvalidUriError: if uri_str not valid. """ return gslib.util.StorageUri(uri_str, self.bucket_storage_uri_class, self.debug)
def RunCommand(self): if gslib.IS_PACKAGE_INSTALL: raise CommandException( 'Update command is only available for gsutil installed from a ' 'tarball. If you installed gsutil via another method, use the same ' 'method to update it.') is_secure = BOTO_IS_SECURE if not is_secure[0]: raise CommandException( 'Your boto configuration has %s = False. The update command\n' 'cannot be run this way, for security reasons.' % is_secure[1]) self._DisallowUpdataIfDataInGsutilDir() force_update = False no_prompt = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': force_update = True if o == '-n': no_prompt = True dirs_to_remove = [] tmp_dir = tempfile.mkdtemp() dirs_to_remove.append(tmp_dir) os.chdir(tmp_dir) if not no_prompt: self.logger.info('Checking for software update...') if self.args: update_from_uri_str = self.args[0] if not update_from_uri_str.endswith('.tar.gz'): raise CommandException( 'The update command only works with tar.gz files.') for i, result in enumerate( self.WildcardIterator(update_from_uri_str)): if i > 0: raise CommandException( 'Invalid update URI. Must name a single .tar.gz file.') if result.uri.names_file(): if not force_update: raise CommandException(( '"update" command does not support "file://" URIs without the ' '-f option.')) elif not result.uri.names_object(): raise CommandException( 'Invalid update object URI. Must name a single .tar.gz file.' ) else: update_from_uri_str = GSUTIL_PUB_TARBALL # Try to retrieve version info from tarball metadata; failing that; download # the tarball and extract the VERSION file. The version lookup will fail # when running the update system test, because it retrieves the tarball from # a temp file rather than a cloud URI (files lack the version metadata). suri_builder = StorageUriBuilder(self.debug, self.bucket_storage_uri_class) tarball_version = LookUpGsutilVersion( self.suri_builder.StorageUri(update_from_uri_str)) if tarball_version: tf = None else: tf = self._FetchAndOpenGsutilTarball(update_from_uri_str) tf.extractall() with open(os.path.join('gsutil', 'VERSION'), 'r') as ver_file: tarball_version = ver_file.read().strip() if not force_update and gslib.VERSION == tarball_version: self._CleanUpUpdateCommand(tf, dirs_to_remove) if self.args: raise CommandException('You already have %s installed.' % update_from_uri_str, informational=True) else: raise CommandException( 'You already have the latest gsutil release ' 'installed.', informational=True) if not no_prompt: (g, m) = CompareVersions(tarball_version, gslib.VERSION) if m: print('\n'.join( textwrap.wrap( 'This command will update to the "%s" version of gsutil at %s. ' 'NOTE: This a major new version, so it is strongly recommended ' 'that you review the release note details at %s before updating to ' 'this version, especially if you use gsutil in scripts.' % (tarball_version, gslib.GSUTIL_DIR, RELEASE_NOTES_URL)))) else: print( 'This command will update to the "%s" version of\ngsutil at %s' % (tarball_version, gslib.GSUTIL_DIR)) self._ExplainIfSudoNeeded(tf, dirs_to_remove) if no_prompt: answer = 'y' else: answer = raw_input('Proceed? [y/N] ') if not answer or answer.lower()[0] != 'y': self._CleanUpUpdateCommand(tf, dirs_to_remove) raise CommandException('Not running update.', informational=True) if not tf: tf = self._FetchAndOpenGsutilTarball(update_from_uri_str) # Ignore keyboard interrupts during the update to reduce the chance someone # hitting ^C leaves gsutil in a broken state. signal.signal(signal.SIGINT, signal.SIG_IGN) # gslib.GSUTIL_DIR lists the path where the code should end up (like # /usr/local/gsutil), which is one level down from the relative path in the # tarball (since the latter creates files in ./gsutil). So, we need to # extract at the parent directory level. gsutil_bin_parent_dir = os.path.normpath( os.path.join(gslib.GSUTIL_DIR, '..')) # Extract tarball to a temporary directory in a sibling to GSUTIL_DIR. old_dir = tempfile.mkdtemp(dir=gsutil_bin_parent_dir) new_dir = tempfile.mkdtemp(dir=gsutil_bin_parent_dir) dirs_to_remove.append(old_dir) dirs_to_remove.append(new_dir) self._EnsureDirsSafeForUpdate(dirs_to_remove) try: tf.extractall(path=new_dir) except Exception, e: self._CleanUpUpdateCommand(tf, dirs_to_remove) raise CommandException('Update failed: %s.' % e)
def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug): """Checks the last time we checked for an update, and if it's been longer than the configured threshold offers the user to update gsutil. Args: command_name: The name of the command being run. debug: Debug level to pass in to boto connection (range 0..3). Returns: True if the user decides to update. """ # Don't try to interact with user if: # - gsutil is not connected to a tty (e.g., if being run from cron); # - user is running gsutil -q # - user is running the update command (which could otherwise cause an # additional note that an update is available when user is already trying # to perform an update); # - user doesn't have credentials configured; or, # - user specified gs_host (which could be a non-production different # service instance, in which case credentials won't work for checking # gsutil tarball). gs_host = boto.config.get('Credentials', 'gs_host', None) if (not sys.stdout.isatty() or not sys.stderr.isatty() or not sys.stdin.isatty() or command_name == 'update' or not logging.getLogger().isEnabledFor(logging.INFO) or not HasConfiguredCredentials() or gs_host): return False software_update_check_period = boto.config.get( 'GSUtil', 'software_update_check_period', 30) # Setting software_update_check_period to 0 means periodic software # update checking is disabled. if software_update_check_period == 0: return False cur_ts = int(time.time()) if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE): # Set last_checked_ts from date of VERSION file, so if the user installed # an old copy of gsutil it will get noticed (and an update offered) the # first time they try to run it. last_checked_ts = int(os.path.getmtime(gslib.VERSION_FILE)) with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f: f.write(str(last_checked_ts)) else: with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'r') as f: last_checked_ts = int(f.readline()) if (cur_ts - last_checked_ts > software_update_check_period * SECONDS_PER_DAY): suri_builder = StorageUriBuilder(debug, self.bucket_storage_uri_class) cur_ver = LookUpGsutilVersion( suri_builder.StorageUri(GSUTIL_PUB_TARBALL)) with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f: f.write(str(cur_ts)) if gslib.VERSION != cur_ver: print '\n'.join( textwrap.wrap( 'A newer version of gsutil (%s) is available than the version you ' 'are running (%s). A detailed log of gsutil release changes is ' 'available at gs://pub/gsutil_ReleaseNotes.txt if you would like ' 'to read them before updating.' % (cur_ver, gslib.VERSION), width=78)) if gslib.IS_PACKAGE_INSTALL: return False print answer = raw_input('Would you like to update [Y/n]? ') return not answer or answer.lower()[0] != 'n' return False
def _MaybeCheckForAndOfferSoftwareUpdate(self, command_name, debug): """Checks the last time we checked for an update, and if it's been longer than the configured threshold offers the user to update gsutil. Args: command_name: The name of the command being run. debug: Debug level to pass in to boto connection (range 0..3). Returns: True if the user decides to update. """ # Don't try to interact with user if: # - gsutil is not connected to a tty (e.g., if being run from cron); # - user is running gsutil -q # - user is running the config command (which could otherwise attempt to # check for an update for a user running behind a proxy, who has not yet # configured gsutil to go through the proxy; for such users we need the # first connection attempt to be made by the gsutil config command). # - user is running the version command (which gets run when using # gsutil -D, which would prevent users with proxy config problems from # sending us gsutil -D output). # - user is running the update command (which could otherwise cause an # additional note that an update is available when user is already trying # to perform an update); # - user specified gs_host (which could be a non-production different # service instance, in which case credentials won't work for checking # gsutil tarball). gs_host = boto.config.get('Credentials', 'gs_host', None) if (not IsRunningInteractively() or command_name in ('config', 'update', 'ver', 'version') or not logging.getLogger().isEnabledFor(logging.INFO) or gs_host): return False software_update_check_period = boto.config.getint( 'GSUtil', 'software_update_check_period', 30) # Setting software_update_check_period to 0 means periodic software # update checking is disabled. if software_update_check_period == 0: return False cur_ts = int(time.time()) if not os.path.isfile(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE): # Set last_checked_ts from date of VERSION file, so if the user installed # an old copy of gsutil it will get noticed (and an update offered) the # first time they try to run it. last_checked_ts = GetGsutilVersionModifiedTime() with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f: f.write(str(last_checked_ts)) else: try: with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'r') as f: last_checked_ts = int(f.readline()) except (TypeError, ValueError): return False if (cur_ts - last_checked_ts > software_update_check_period * SECONDS_PER_DAY): suri_builder = StorageUriBuilder(debug, self.bucket_storage_uri_class) cur_ver = LookUpGsutilVersion( suri_builder.StorageUri(GSUTIL_PUB_TARBALL)) with open(LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE, 'w') as f: f.write(str(cur_ts)) (g, m) = CompareVersions(cur_ver, gslib.VERSION) if m: print '\n'.join( textwrap.wrap( 'A newer version of gsutil (%s) is available than the version you ' 'are running (%s). NOTE: This is a major new version, so it is ' 'strongly recommended that you review the release note details at %s ' 'before updating to this version, especially if you use gsutil in ' 'scripts.' % (cur_ver, gslib.VERSION, RELEASE_NOTES_URL))) if gslib.IS_PACKAGE_INSTALL: return False print answer = raw_input('Would you like to update [y/N]? ') return answer and answer.lower()[0] == 'y' elif g: print '\n'.join( textwrap.wrap( 'A newer version of gsutil (%s) is available than the version you ' 'are running (%s). A detailed log of gsutil release changes is ' 'available at %s if you would like to read them before updating.' % (cur_ver, gslib.VERSION, RELEASE_NOTES_URL))) if gslib.IS_PACKAGE_INSTALL: return False print answer = raw_input('Would you like to update [Y/n]? ') return not answer or answer.lower()[0] != 'n' return False