def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None, download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE): """Initializes the daisy chain wrapper. Args: src_url: Source CloudUrl to copy from. src_obj_size: Size of source object. gsutil_api: gsutil Cloud API to use for the copy. progress_callback: Optional callback function for progress notifications for the download thread. Receives calls with arguments (bytes_transferred, total_size). download_chunk_size: Integer number of bytes to download per GetObjectMedia request. This is the upper bound of bytes that may be unnecessarily downloaded if there is a break in the resumable upload. """ # Current read position for the upload file pointer. self.position = 0 self.buffer = deque() self.bytes_buffered = 0 # Maximum amount of bytes in memory at a time. self.max_buffer_size = 1024 * 1024 # 1 MiB self._download_chunk_size = download_chunk_size # We save one buffer's worth of data as a special case for boto, # which seeks back one buffer and rereads to compute hashes. This is # unnecessary because we can just compare cloud hash digests at the end, # but it allows this to work without modfiying boto. self.last_position = 0 self.last_data = None # Protects buffer, position, bytes_buffered, last_position, and last_data. self.lock = CreateLock() # Protects download_exception. self.download_exception_lock = CreateLock() self.src_obj_size = src_obj_size self.src_url = src_url # This is safe to use the upload and download thread because the download # thread calls only GetObjectMedia, which creates a new HTTP connection # independent of gsutil_api. Thus, it will not share an HTTP connection # with the upload. self.gsutil_api = gsutil_api # If self.download_thread dies due to an exception, it is saved here so # that it can also be raised in the upload thread. self.download_exception = None self.download_thread = None self.progress_callback = progress_callback self.stop_download = threading.Event() self.StartDownloadThread(progress_callback=self.progress_callback)
def testWriteComponentToParallelUploadTrackerFile(self): tracker_file_lock = CreateLock() fpath = self.CreateTempFile(file_name='foo') random_prefix = '123' enc_key = '456' objects = [ ObjectFromTracker('obj1', '42'), ObjectFromTracker('obj2', '314159') ] WriteParallelUploadTrackerFile(fpath, random_prefix, objects, encryption_key_sha256=enc_key) new_object = ObjectFromTracker('obj3', '43') try: WriteComponentToParallelUploadTrackerFile( fpath, tracker_file_lock, new_object, self.logger, encryption_key_sha256=None) self.fail( 'Expected CommandException due to different encryption key') except CommandException, e: self.assertIn('does not match encryption key', str(e))
def test_ParseEmptyParallelUploadTrackerFile(self): """Tests _ParseParallelUploadTrackerFile with an empty tracker file.""" tracker_file_lock = CreateLock() fpath = self.CreateTempFile(file_name='foo', contents='') expected_objects = [] (actual_prefix, actual_objects) = _ParseParallelUploadTrackerFile( fpath, tracker_file_lock) self.assertEqual(actual_objects, expected_objects) self.assertIsNotNone(actual_prefix)
def InitializeMultiprocessingVariables(): """ Perform necessary initialization - see gslib.command.InitializeMultiprocessingVariables for an explanation of why this is necessary. """ global token_exchange_lock # Lock used for checking/exchanging refresh token so that a parallelized # operation doesn't attempt concurrent refreshes. token_exchange_lock = CreateLock()
def test_ParseParallelUploadTrackerFile(self): tracker_file_lock = CreateLock() random_prefix = '123' objects = ['obj1', '42', 'obj2', '314159'] contents = '\n'.join([random_prefix] + objects) fpath = self.CreateTempFile(file_name='foo', contents=contents) expected_objects = [ ObjectFromTracker(objects[2 * i], objects[2 * i + 1]) for i in range(0, len(objects) / 2) ] (actual_prefix, actual_objects) = _ParseParallelUploadTrackerFile( fpath, tracker_file_lock) self.assertEqual(random_prefix, actual_prefix) self.assertEqual(expected_objects, actual_objects)
def test_CreateParallelUploadTrackerFile(self): tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() random_prefix = '123' objects = ['obj1', '42', 'obj2', '314159'] expected_contents = [random_prefix] + objects objects = [ ObjectFromTracker(objects[2 * i], objects[2 * i + 1]) for i in range(0, len(objects) / 2) ] _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects, tracker_file_lock) with open(tracker_file, 'rb') as f: lines = f.read().splitlines() self.assertEqual(expected_contents, lines)
def test_AppendComponentTrackerToParallelUploadTrackerFile(self): """Tests the _CreateParallelUploadTrackerFile function with append.""" tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() random_prefix = '123' objects = ['obj1', '42', 'obj2', '314159'] expected_contents = [random_prefix] + objects objects = [ ObjectFromTracker(objects[2 * i], objects[2 * i + 1]) for i in range(0, len(objects) / 2) ] _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects, tracker_file_lock) new_object = ['obj2', '1234'] expected_contents += new_object new_object = ObjectFromTracker(new_object[0], new_object[1]) _AppendComponentTrackerToParallelUploadTrackerFile( tracker_file, new_object, tracker_file_lock) with open(tracker_file, 'rb') as f: lines = f.read().splitlines() self.assertEqual(expected_contents, lines)
def __init__(self, src_url, src_obj_size, gsutil_api): """Initializes the daisy chain wrapper. Args: src_url: Source CloudUrl to copy from. src_obj_size: Size of source object. gsutil_api: gsutil Cloud API to use for the copy. """ # Current read position for the upload file pointer. self.position = 0 self.buffer = deque() self.bytes_buffered = 0 self.max_buffer_size = 1024 * 1024 # 1 MB # We save one buffer's worth of data as a special case for boto, # which seeks back one buffer and rereads to compute hashes. This is # unnecessary because we can just compare cloud hash digests at the end, # but it allows this to work without modfiying boto. self.last_position = 0 self.last_data = None # Protects buffer, position, bytes_buffered, last_position, and last_data. self.lock = CreateLock() self.src_obj_size = src_obj_size self.src_url = src_url # This is safe to use the upload and download thread because the download # thread calls only GetObjectMedia, which creates a new HTTP connection # independent of gsutil_api. Thus, it will not share an HTTP connection # with the upload. self.gsutil_api = gsutil_api self.download_thread = None self.stop_download = threading.Event() self.StartDownloadThread()
def test_FilterExistingComponentsNonVersioned(self): """Tests upload with a variety of component states.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, '', empty_object, tracker_file, tracker_file_lock, None) # Not yet uploaded, but needed. fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded)) object_not_uploaded_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_not_uploaded)) args_not_uploaded = PerformParallelUploadFileToObjectArgs( fpath_not_uploaded, 0, 1, fpath_not_uploaded_url, object_not_uploaded_url, '', empty_object, tracker_file, tracker_file_lock, None) # Already uploaded, but contents no longer match. Even though the contents # differ, we don't delete this since the bucket is not versioned and it # will be overwritten anyway. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) object_wrong_contents_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='1') args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, object_wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock, None) # Exists in tracker file, but component object no longer exists. fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') fpath_remote_deleted_url = StorageUrlFromString( str(fpath_remote_deleted)) args_remote_deleted = PerformParallelUploadFileToObjectArgs( fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '', empty_object, tracker_file, tracker_file_lock, None) # Exists in tracker file and already uploaded, but no longer needed. fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') with open(fpath_no_longer_used) as f_in: file_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name='foo6', md5Hash=file_md5), contents='6') dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_not_uploaded: args_not_uploaded, fpath_wrong_contents: args_wrong_contents, fpath_remote_deleted: args_remote_deleted } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, ''), ObjectFromTracker(fpath_wrong_contents, ''), ObjectFromTracker(fpath_remote_deleted, ''), ObjectFromTracker(fpath_no_longer_used, '') ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) for arg in [ args_not_uploaded, args_wrong_contents, args_remote_deleted ]: self.assertTrue(arg in components_to_upload) self.assertEqual(1, len(uploaded_components)) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) self.assertEqual(1, len(existing_objects_to_delete)) no_longer_used_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_no_longer_used)) self.assertEqual(no_longer_used_url.url_string, existing_objects_to_delete[0].url_string)
def test_FilterExistingComponentsVersioned(self): """Tests upload with versionined parallel components.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') mock_api.MockCreateVersionedBucket(bucket_name) # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, object_uploaded_correctly.generation)) args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, object_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock, None) # Duplicate object name in tracker file, but uploaded correctly. fpath_duplicate = fpath_uploaded_correctly fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate)) duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_duplicate, md5Hash=fpath_uploaded_correctly_md5), contents='1') duplicate_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, duplicate_uploaded_correctly.generation)) args_duplicate = PerformParallelUploadFileToObjectArgs( fpath_duplicate, 0, 1, fpath_duplicate_url, duplicate_uploaded_correctly_url, duplicate_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock, None) # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) object_wrong_contents = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='_') wrong_contents_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_wrong_contents, object_wrong_contents.generation)) args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock, None) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_wrong_contents: args_wrong_contents } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, object_uploaded_correctly_url.generation), ObjectFromTracker(fpath_duplicate, duplicate_uploaded_correctly_url.generation), ObjectFromTracker(fpath_wrong_contents, wrong_contents_url.generation) ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) expected_to_delete = [(args_wrong_contents.dst_url.object_name, args_wrong_contents.dst_url.generation), (args_duplicate.dst_url.object_name, args_duplicate.dst_url.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
def main(): InitializeSignalHandling() # Any modules used in initializing multiprocessing variables must be # imported after importing gslib.__main__. # pylint: disable=redefined-outer-name,g-import-not-at-top import gslib.boto_translation import gslib.command import gslib.util from gslib.util import BOTO_IS_SECURE from gslib.util import CERTIFICATE_VALIDATION_ENABLED # pylint: disable=unused-variable from gcs_oauth2_boto_plugin import oauth2_client from apitools.base.py import credentials_lib # pylint: enable=unused-variable from gslib.util import CheckMultiprocessingAvailableAndInit if CheckMultiprocessingAvailableAndInit().is_available: # These setup methods must be called, and, on Windows, they can only be # called from within an "if __name__ == '__main__':" block. gslib.command.InitializeMultiprocessingVariables() gslib.boto_translation.InitializeMultiprocessingVariables() else: gslib.command.InitializeThreadingVariables() # This needs to be done after gslib.util.InitializeMultiprocessingVariables(), # since otherwise we can't call gslib.util.CreateLock. try: # pylint: disable=unused-import,g-import-not-at-top import gcs_oauth2_boto_plugin gsutil_client_id, gsutil_client_secret = GetGsutilClientIdAndSecret() gcs_oauth2_boto_plugin.oauth2_helper.SetFallbackClientIdAndSecret( gsutil_client_id, gsutil_client_secret) gcs_oauth2_boto_plugin.oauth2_helper.SetLock(CreateLock()) credentials_lib.SetCredentialsCacheFileLock(CreateLock()) except ImportError: pass global debug global test_exception_traces if not (2, 7) <= sys.version_info[:3] < (3,): raise CommandException('gsutil requires python 2.7.') # In gsutil 4.0 and beyond, we don't use the boto library for the JSON # API. However, we still store gsutil configuration data in the .boto # config file for compatibility with previous versions and user convenience. # Many users have a .boto configuration file from previous versions, and it # is useful to have all of the configuration for gsutil stored in one place. command_runner = CommandRunner() if not BOTO_IS_SECURE: raise CommandException('\n'.join(textwrap.wrap( 'Your boto configuration has is_secure = False. Gsutil cannot be ' 'run this way, for security reasons.'))) headers = {} parallel_operations = False quiet = False version = False debug = 0 trace_token = None perf_trace_token = None test_exception_traces = False # If user enters no commands just print the usage info. if len(sys.argv) == 1: sys.argv.append('help') # Change the default of the 'https_validate_certificates' boto option to # True (it is currently False in boto). if not boto.config.has_option('Boto', 'https_validate_certificates'): if not boto.config.has_section('Boto'): boto.config.add_section('Boto') boto.config.setbool('Boto', 'https_validate_certificates', True) gslib.util.configured_certs_file = gslib.util.ConfigureCertsFile() for signal_num in GetCaughtSignals(): RegisterSignalHandler(signal_num, _CleanupSignalHandler) GetCertsFile() try: try: opts, args = getopt.getopt(sys.argv[1:], 'dDvo:h:mq', ['debug', 'detailedDebug', 'version', 'option', 'help', 'header', 'multithreaded', 'quiet', 'testexceptiontraces', 'trace-token=', 'perf-trace-token=']) except getopt.GetoptError as e: _HandleCommandException(CommandException(e.msg)) for o, a in opts: if o in ('-d', '--debug'): # Also causes boto to include httplib header output. debug = DEBUGLEVEL_DUMP_REQUESTS elif o in ('-D', '--detailedDebug'): # We use debug level 3 to ask gsutil code to output more detailed # debug output. This is a bit of a hack since it overloads the same # flag that was originally implemented for boto use. And we use -DD # to ask for really detailed debugging (i.e., including HTTP payload). if debug == DEBUGLEVEL_DUMP_REQUESTS: debug = DEBUGLEVEL_DUMP_REQUESTS_AND_PAYLOADS else: debug = DEBUGLEVEL_DUMP_REQUESTS elif o in ('-?', '--help'): _OutputUsageAndExit(command_runner) elif o in ('-h', '--header'): (hdr_name, _, hdr_val) = a.partition(':') if not hdr_name: _OutputUsageAndExit(command_runner) headers[hdr_name.lower()] = hdr_val elif o in ('-m', '--multithreaded'): parallel_operations = True elif o in ('-q', '--quiet'): quiet = True elif o in ('-v', '--version'): version = True elif o == '--perf-trace-token': perf_trace_token = a elif o == '--trace-token': trace_token = a elif o == '--testexceptiontraces': # Hidden flag for integration tests. test_exception_traces = True # Avoid printing extra warnings to stderr regarding long retries by # setting the threshold very high. gslib.util.LONG_RETRY_WARN_SEC = 3600 elif o in ('-o', '--option'): (opt_section_name, _, opt_value) = a.partition('=') if not opt_section_name: _OutputUsageAndExit(command_runner) (opt_section, _, opt_name) = opt_section_name.partition(':') if not opt_section or not opt_name: _OutputUsageAndExit(command_runner) if not boto.config.has_section(opt_section): boto.config.add_section(opt_section) boto.config.set(opt_section, opt_name, opt_value) metrics.LogCommandParams(global_opts=opts) httplib2.debuglevel = debug if trace_token: sys.stderr.write(TRACE_WARNING) if debug >= DEBUGLEVEL_DUMP_REQUESTS: sys.stderr.write(DEBUG_WARNING) _ConfigureLogging(level=logging.DEBUG) command_runner.RunNamedCommand('ver', ['-l']) config_items = [] try: config_items.extend(boto.config.items('Boto')) config_items.extend(boto.config.items('GSUtil')) except ConfigParser.NoSectionError: pass for i in xrange(len(config_items)): config_item_key = config_items[i][0] if config_item_key in CONFIG_KEYS_TO_REDACT: config_items[i] = (config_item_key, 'REDACTED') sys.stderr.write('Command being run: %s\n' % ' '.join(sys.argv)) sys.stderr.write('config_file_list: %s\n' % GetBotoConfigFileList()) sys.stderr.write('config: %s\n' % str(config_items)) elif quiet: _ConfigureLogging(level=logging.WARNING) else: _ConfigureLogging(level=logging.INFO) # oauth2client uses info logging in places that would better # correspond to gsutil's debug logging (e.g., when refreshing # access tokens). oauth2client.client.logger.setLevel(logging.WARNING) if not CERTIFICATE_VALIDATION_ENABLED: sys.stderr.write(HTTP_WARNING) if version: command_name = 'version' elif not args: command_name = 'help' else: command_name = args[0] _CheckAndWarnForProxyDifferences() if os.environ.get('_ARGCOMPLETE', '0') == '1': return _PerformTabCompletion(command_runner) return _RunNamedCommandAndHandleExceptions( command_runner, command_name, args=args[1:], headers=headers, debug_level=debug, trace_token=trace_token, parallel_operations=parallel_operations, perf_trace_token=perf_trace_token) finally: _Cleanup()
def main(): # Any modules used in initializing multiprocessing variables must be # imported after importing gslib.__main__. # pylint: disable=redefined-outer-name,g-import-not-at-top import gslib.boto_translation import gslib.command import gslib.util from gslib.util import BOTO_IS_SECURE from gslib.util import CERTIFICATE_VALIDATION_ENABLED from gcs_oauth2_boto_plugin import oauth2_client from gslib.util import MultiprocessingIsAvailable if MultiprocessingIsAvailable()[0]: # These setup methods must be called, and, on Windows, they can only be # called from within an "if __name__ == '__main__':" block. gslib.util.InitializeMultiprocessingVariables() gslib.command.InitializeMultiprocessingVariables() gslib.boto_translation.InitializeMultiprocessingVariables() # This needs to be done after gslib.util.InitializeMultiprocessingVariables(), # since otherwise we can't call gslib.util.CreateLock. try: # pylint: disable=unused-import,g-import-not-at-top import gcs_oauth2_boto_plugin gcs_oauth2_boto_plugin.oauth2_helper.SetFallbackClientIdAndSecret( GSUTIL_CLIENT_ID, GSUTIL_CLIENT_NOTSOSECRET) gcs_oauth2_boto_plugin.oauth2_helper.SetLock(CreateLock()) except ImportError: pass global debug global test_exception_traces if not (2, 6) <= sys.version_info[:3] < (3,): raise gslib.exception.CommandException( 'gsutil requires python 2.6 or 2.7.') # In gsutil 4.0 and beyond, we don't use the boto library for the JSON # API. However, we still store gsutil configuration data in the .boto # config file for compatibility with previous versions and user convenience. # Many users have a .boto configuration file from previous versions, and it # is useful to have all of the configuration for gsutil stored in one place. command_runner = CommandRunner() if not BOTO_IS_SECURE: raise CommandException('\n'.join(textwrap.wrap( 'Your boto configuration has is_secure = False. Gsutil cannot be ' 'run this way, for security reasons.'))) headers = {} parallel_operations = False quiet = False version = False debug = 0 test_exception_traces = False # If user enters no commands just print the usage info. if len(sys.argv) == 1: sys.argv.append('help') # Change the default of the 'https_validate_certificates' boto option to # True (it is currently False in boto). if not boto.config.has_option('Boto', 'https_validate_certificates'): if not boto.config.has_section('Boto'): boto.config.add_section('Boto') boto.config.setbool('Boto', 'https_validate_certificates', True) GetCertsFile() try: try: opts, args = getopt.getopt(sys.argv[1:], 'dDvo:h:mq', ['debug', 'detailedDebug', 'version', 'option', 'help', 'header', 'multithreaded', 'quiet', 'testexceptiontraces']) except getopt.GetoptError as e: _HandleCommandException(gslib.exception.CommandException(e.msg)) for o, a in opts: if o in ('-d', '--debug'): # Passing debug=2 causes boto to include httplib header output. debug = 3 elif o in ('-D', '--detailedDebug'): # We use debug level 3 to ask gsutil code to output more detailed # debug output. This is a bit of a hack since it overloads the same # flag that was originally implemented for boto use. And we use -DD # to ask for really detailed debugging (i.e., including HTTP payload). if debug == 3: debug = 4 else: debug = 3 elif o in ('-?', '--help'): _OutputUsageAndExit(command_runner) elif o in ('-h', '--header'): (hdr_name, _, hdr_val) = a.partition(':') if not hdr_name: _OutputUsageAndExit(command_runner) headers[hdr_name.lower()] = hdr_val elif o in ('-m', '--multithreaded'): parallel_operations = True elif o in ('-q', '--quiet'): quiet = True elif o in ('-v', '--version'): version = True elif o == '--testexceptiontraces': # Hidden flag for integration tests. test_exception_traces = True elif o in ('-o', '--option'): (opt_section_name, _, opt_value) = a.partition('=') if not opt_section_name: _OutputUsageAndExit(command_runner) (opt_section, _, opt_name) = opt_section_name.partition(':') if not opt_section or not opt_name: _OutputUsageAndExit(command_runner) if not boto.config.has_section(opt_section): boto.config.add_section(opt_section) boto.config.set(opt_section, opt_name, opt_value) httplib2.debuglevel = debug if debug > 1: sys.stderr.write(DEBUG_WARNING) if debug >= 2: _ConfigureLogging(level=logging.DEBUG) command_runner.RunNamedCommand('ver', ['-l']) config_items = [] try: config_items.extend(boto.config.items('Boto')) config_items.extend(boto.config.items('GSUtil')) except ConfigParser.NoSectionError: pass for i in xrange(len(config_items)): config_item_key = config_items[i][0] if config_item_key in CONFIG_KEYS_TO_REDACT: config_items[i] = (config_item_key, 'REDACTED') sys.stderr.write('Command being run: %s\n' % ' '.join(sys.argv)) sys.stderr.write('config_file_list: %s\n' % GetBotoConfigFileList()) sys.stderr.write('config: %s\n' % str(config_items)) elif quiet: _ConfigureLogging(level=logging.WARNING) else: _ConfigureLogging(level=logging.INFO) # oauth2client uses info logging in places that would better # correspond to gsutil's debug logging (e.g., when refreshing # access tokens). oauth2client.client.logger.setLevel(logging.WARNING) if not CERTIFICATE_VALIDATION_ENABLED: sys.stderr.write(HTTP_WARNING) if version: command_name = 'version' elif not args: command_name = 'help' else: command_name = args[0] # Unset http_proxy environment variable if it's set, because it confuses # boto. (Proxies should instead be configured via the boto config file.) if 'http_proxy' in os.environ: if debug > 1: sys.stderr.write( 'Unsetting http_proxy environment variable within gsutil run.\n') del os.environ['http_proxy'] if os.environ.get('_ARGCOMPLETE', '0') == '1': return _PerformTabCompletion(command_runner) return _RunNamedCommandAndHandleExceptions( command_runner, command_name, args=args[1:], headers=headers, debug_level=debug, parallel_operations=parallel_operations) finally: _Cleanup()
def test_filter_existing_components_versioned(self): suri_builder = StorageUriBuilder(0, BucketStorageUri) bucket_uri = self.CreateVersionedBucket() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, key_uploaded_correctly, key_uploaded_correctly.generation, {}, tracker_file, tracker_file_lock) # Duplicate object name in tracker file, but uploaded correctly. fpath_duplicate = fpath_uploaded_correctly key_duplicate = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_duplicate = PerformResumableUploadIfAppliesArgs( fpath_duplicate, 0, 1, fpath_duplicate, key_duplicate, key_duplicate.generation, {}, tracker_file, tracker_file_lock) object_name_duplicate = ObjectFromTracker( fpath_duplicate, key_duplicate.generation).object_name uri_duplicate = MakeGsUri(bucket_uri.bucket_name, object_name_duplicate, suri_builder) uri_duplicate.generation = args_duplicate.dst_uri.generation # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', bucket_uri=bucket_uri) args_wrong_contents = PerformResumableUploadIfAppliesArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, key_wrong_contents.generation, {}, tracker_file, tracker_file_lock) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_wrong_contents: args_wrong_contents } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, key_uploaded_correctly.generation), ObjectFromTracker(fpath_duplicate, key_duplicate.generation), ObjectFromTracker(fpath_wrong_contents, key_wrong_contents.generation) ] (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_uri.bucket_name, suri_builder)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(str([args_uploaded_correctly.dst_uri]), str(uploaded_components)) expected_to_delete = [(args_wrong_contents.dst_uri.object_name, args_wrong_contents.dst_uri.generation), (uri_duplicate.object_name, args_duplicate.dst_uri.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
def test_filter_existing_components_non_versioned(self): bucket_uri = self.CreateBucket() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', bucket_uri=bucket_uri) args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, key_uploaded_correctly, '', {}, tracker_file, tracker_file_lock) # Not yet uploaded, but needed. fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') key_not_uploaded = self.CreateObject(object_name='foo2', contents='2', bucket_uri=bucket_uri) args_not_uploaded = PerformResumableUploadIfAppliesArgs( fpath_not_uploaded, 0, 1, fpath_not_uploaded, key_not_uploaded, '', {}, tracker_file, tracker_file_lock) # Already uploaded, but contents no longer match. Even though the contents # differ, we don't delete this since the bucket is not versioned and it # will be overwritten anyway. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', bucket_uri=bucket_uri) args_wrong_contents = PerformResumableUploadIfAppliesArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, '', {}, tracker_file, tracker_file_lock) # Exists in tracker file, but component object no longer exists. fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') args_remote_deleted = PerformResumableUploadIfAppliesArgs( fpath_remote_deleted, 0, 1, fpath_remote_deleted, '', '', {}, tracker_file, tracker_file_lock) # Exists in tracker file and already uploaded, but no longer needed. fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') key_no_longer_used = self.CreateObject(object_name='foo6', contents='6', bucket_uri=bucket_uri) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_not_uploaded: args_not_uploaded, fpath_wrong_contents: args_wrong_contents, fpath_remote_deleted: args_remote_deleted } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, ''), ObjectFromTracker(fpath_wrong_contents, ''), ObjectFromTracker(fpath_remote_deleted, ''), ObjectFromTracker(fpath_no_longer_used, '') ] suri_builder = StorageUriBuilder(0, BucketStorageUri) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_uri.bucket_name, suri_builder)) for arg in [ args_not_uploaded, args_wrong_contents, args_remote_deleted ]: self.assertTrue(arg in components_to_upload) self.assertEqual(str([args_uploaded_correctly.dst_uri]), str(uploaded_components)) self.assertEqual( str([ MakeGsUri(bucket_uri.bucket_name, fpath_no_longer_used, suri_builder) ]), str(existing_objects_to_delete))