def __init__(self,
                 src_url,
                 src_obj_size,
                 gsutil_api,
                 progress_callback=None,
                 download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE):
        """Initializes the daisy chain wrapper.

    Args:
      src_url: Source CloudUrl to copy from.
      src_obj_size: Size of source object.
      gsutil_api: gsutil Cloud API to use for the copy.
      progress_callback: Optional callback function for progress notifications
          for the download thread. Receives calls with arguments
          (bytes_transferred, total_size).
      download_chunk_size: Integer number of bytes to download per
          GetObjectMedia request. This is the upper bound of bytes that may be
          unnecessarily downloaded if there is a break in the resumable upload.

    """
        # Current read position for the upload file pointer.
        self.position = 0
        self.buffer = deque()

        self.bytes_buffered = 0
        # Maximum amount of bytes in memory at a time.
        self.max_buffer_size = 1024 * 1024  # 1 MiB

        self._download_chunk_size = download_chunk_size

        # We save one buffer's worth of data as a special case for boto,
        # which seeks back one buffer and rereads to compute hashes. This is
        # unnecessary because we can just compare cloud hash digests at the end,
        # but it allows this to work without modfiying boto.
        self.last_position = 0
        self.last_data = None

        # Protects buffer, position, bytes_buffered, last_position, and last_data.
        self.lock = CreateLock()

        # Protects download_exception.
        self.download_exception_lock = CreateLock()

        self.src_obj_size = src_obj_size
        self.src_url = src_url

        # This is safe to use the upload and download thread because the download
        # thread calls only GetObjectMedia, which creates a new HTTP connection
        # independent of gsutil_api. Thus, it will not share an HTTP connection
        # with the upload.
        self.gsutil_api = gsutil_api

        # If self.download_thread dies due to an exception, it is saved here so
        # that it can also be raised in the upload thread.
        self.download_exception = None
        self.download_thread = None
        self.progress_callback = progress_callback
        self.stop_download = threading.Event()
        self.StartDownloadThread(progress_callback=self.progress_callback)
Exemple #2
0
 def testWriteComponentToParallelUploadTrackerFile(self):
     tracker_file_lock = CreateLock()
     fpath = self.CreateTempFile(file_name='foo')
     random_prefix = '123'
     enc_key = '456'
     objects = [
         ObjectFromTracker('obj1', '42'),
         ObjectFromTracker('obj2', '314159')
     ]
     WriteParallelUploadTrackerFile(fpath,
                                    random_prefix,
                                    objects,
                                    encryption_key_sha256=enc_key)
     new_object = ObjectFromTracker('obj3', '43')
     try:
         WriteComponentToParallelUploadTrackerFile(
             fpath,
             tracker_file_lock,
             new_object,
             self.logger,
             encryption_key_sha256=None)
         self.fail(
             'Expected CommandException due to different encryption key')
     except CommandException, e:
         self.assertIn('does not match encryption key', str(e))
Exemple #3
0
 def test_ParseEmptyParallelUploadTrackerFile(self):
     """Tests _ParseParallelUploadTrackerFile with an empty tracker file."""
     tracker_file_lock = CreateLock()
     fpath = self.CreateTempFile(file_name='foo', contents='')
     expected_objects = []
     (actual_prefix, actual_objects) = _ParseParallelUploadTrackerFile(
         fpath, tracker_file_lock)
     self.assertEqual(actual_objects, expected_objects)
     self.assertIsNotNone(actual_prefix)
Exemple #4
0
def InitializeMultiprocessingVariables():
  """
  Perform necessary initialization - see
  gslib.command.InitializeMultiprocessingVariables for an explanation of why
  this is necessary.
  """
  global token_exchange_lock
  # Lock used for checking/exchanging refresh token so that a parallelized
  # operation doesn't attempt concurrent refreshes.
  token_exchange_lock = CreateLock()
Exemple #5
0
 def test_ParseParallelUploadTrackerFile(self):
     tracker_file_lock = CreateLock()
     random_prefix = '123'
     objects = ['obj1', '42', 'obj2', '314159']
     contents = '\n'.join([random_prefix] + objects)
     fpath = self.CreateTempFile(file_name='foo', contents=contents)
     expected_objects = [
         ObjectFromTracker(objects[2 * i], objects[2 * i + 1])
         for i in range(0,
                        len(objects) / 2)
     ]
     (actual_prefix, actual_objects) = _ParseParallelUploadTrackerFile(
         fpath, tracker_file_lock)
     self.assertEqual(random_prefix, actual_prefix)
     self.assertEqual(expected_objects, actual_objects)
Exemple #6
0
 def test_CreateParallelUploadTrackerFile(self):
     tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
     tracker_file_lock = CreateLock()
     random_prefix = '123'
     objects = ['obj1', '42', 'obj2', '314159']
     expected_contents = [random_prefix] + objects
     objects = [
         ObjectFromTracker(objects[2 * i], objects[2 * i + 1])
         for i in range(0,
                        len(objects) / 2)
     ]
     _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects,
                                      tracker_file_lock)
     with open(tracker_file, 'rb') as f:
         lines = f.read().splitlines()
     self.assertEqual(expected_contents, lines)
Exemple #7
0
    def test_AppendComponentTrackerToParallelUploadTrackerFile(self):
        """Tests the _CreateParallelUploadTrackerFile function with append."""
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()
        random_prefix = '123'
        objects = ['obj1', '42', 'obj2', '314159']
        expected_contents = [random_prefix] + objects
        objects = [
            ObjectFromTracker(objects[2 * i], objects[2 * i + 1])
            for i in range(0,
                           len(objects) / 2)
        ]
        _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects,
                                         tracker_file_lock)

        new_object = ['obj2', '1234']
        expected_contents += new_object
        new_object = ObjectFromTracker(new_object[0], new_object[1])
        _AppendComponentTrackerToParallelUploadTrackerFile(
            tracker_file, new_object, tracker_file_lock)
        with open(tracker_file, 'rb') as f:
            lines = f.read().splitlines()
        self.assertEqual(expected_contents, lines)
Exemple #8
0
    def __init__(self, src_url, src_obj_size, gsutil_api):
        """Initializes the daisy chain wrapper.

    Args:
      src_url: Source CloudUrl to copy from.
      src_obj_size: Size of source object.
      gsutil_api: gsutil Cloud API to use for the copy.
    """
        # Current read position for the upload file pointer.
        self.position = 0
        self.buffer = deque()

        self.bytes_buffered = 0
        self.max_buffer_size = 1024 * 1024  # 1 MB

        # We save one buffer's worth of data as a special case for boto,
        # which seeks back one buffer and rereads to compute hashes. This is
        # unnecessary because we can just compare cloud hash digests at the end,
        # but it allows this to work without modfiying boto.
        self.last_position = 0
        self.last_data = None

        # Protects buffer, position, bytes_buffered, last_position, and last_data.
        self.lock = CreateLock()

        self.src_obj_size = src_obj_size
        self.src_url = src_url

        # This is safe to use the upload and download thread because the download
        # thread calls only GetObjectMedia, which creates a new HTTP connection
        # independent of gsutil_api. Thus, it will not share an HTTP connection
        # with the upload.
        self.gsutil_api = gsutil_api

        self.download_thread = None
        self.stop_download = threading.Event()
        self.StartDownloadThread()
    def test_FilterExistingComponentsNonVersioned(self):
        """Tests upload with a variety of component states."""
        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_uploaded_correctly,
            md5Hash=fpath_uploaded_correctly_md5),
                                              contents='1')

        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded))
        object_not_uploaded_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_not_uploaded))
        args_not_uploaded = PerformParallelUploadFileToObjectArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded_url,
            object_not_uploaded_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        object_wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_wrong_contents,
            md5Hash=fpath_wrong_contents_md5),
                                              contents='1')

        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            object_wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        fpath_remote_deleted_url = StorageUrlFromString(
            str(fpath_remote_deleted))
        args_remote_deleted = PerformParallelUploadFileToObjectArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '',
            empty_object, tracker_file, tracker_file_lock, None)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        with open(fpath_no_longer_used) as f_in:
            file_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name, name='foo6', md5Hash=file_md5),
                                              contents='6')

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(1, len(uploaded_components))
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        self.assertEqual(1, len(existing_objects_to_delete))
        no_longer_used_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_no_longer_used))
        self.assertEqual(no_longer_used_url.url_string,
                         existing_objects_to_delete[0].url_string)
    def test_FilterExistingComponentsVersioned(self):
        """Tests upload with versionined parallel components."""

        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        mock_api.MockCreateVersionedBucket(bucket_name)

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_uploaded_correctly,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             object_uploaded_correctly.generation))
        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url,
            object_uploaded_correctly.generation, empty_object, tracker_file,
            tracker_file_lock, None)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate))
        duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_duplicate,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        duplicate_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             duplicate_uploaded_correctly.generation))
        args_duplicate = PerformParallelUploadFileToObjectArgs(
            fpath_duplicate, 0, 1, fpath_duplicate_url,
            duplicate_uploaded_correctly_url,
            duplicate_uploaded_correctly.generation, empty_object,
            tracker_file, tracker_file_lock, None)

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        object_wrong_contents = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_wrong_contents,
                                     md5Hash=fpath_wrong_contents_md5),
            contents='_')
        wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents,
             object_wrong_contents.generation))
        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              object_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_duplicate,
                              duplicate_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              wrong_contents_url.generation)
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        expected_to_delete = [(args_wrong_contents.dst_url.object_name,
                               args_wrong_contents.dst_url.generation),
                              (args_duplicate.dst_url.object_name,
                               args_duplicate.dst_url.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
Exemple #11
0
def main():
  InitializeSignalHandling()
  # Any modules used in initializing multiprocessing variables must be
  # imported after importing gslib.__main__.
  # pylint: disable=redefined-outer-name,g-import-not-at-top
  import gslib.boto_translation
  import gslib.command
  import gslib.util
  from gslib.util import BOTO_IS_SECURE
  from gslib.util import CERTIFICATE_VALIDATION_ENABLED
  # pylint: disable=unused-variable
  from gcs_oauth2_boto_plugin import oauth2_client
  from apitools.base.py import credentials_lib
  # pylint: enable=unused-variable
  from gslib.util import CheckMultiprocessingAvailableAndInit
  if CheckMultiprocessingAvailableAndInit().is_available:
    # These setup methods must be called, and, on Windows, they can only be
    # called from within an "if __name__ == '__main__':" block.
    gslib.command.InitializeMultiprocessingVariables()
    gslib.boto_translation.InitializeMultiprocessingVariables()
  else:
    gslib.command.InitializeThreadingVariables()

  # This needs to be done after gslib.util.InitializeMultiprocessingVariables(),
  # since otherwise we can't call gslib.util.CreateLock.
  try:
    # pylint: disable=unused-import,g-import-not-at-top
    import gcs_oauth2_boto_plugin
    gsutil_client_id, gsutil_client_secret = GetGsutilClientIdAndSecret()
    gcs_oauth2_boto_plugin.oauth2_helper.SetFallbackClientIdAndSecret(
        gsutil_client_id, gsutil_client_secret)
    gcs_oauth2_boto_plugin.oauth2_helper.SetLock(CreateLock())
    credentials_lib.SetCredentialsCacheFileLock(CreateLock())
  except ImportError:
    pass

  global debug
  global test_exception_traces

  if not (2, 7) <= sys.version_info[:3] < (3,):
    raise CommandException('gsutil requires python 2.7.')

  # In gsutil 4.0 and beyond, we don't use the boto library for the JSON
  # API. However, we still store gsutil configuration data in the .boto
  # config file for compatibility with previous versions and user convenience.
  # Many users have a .boto configuration file from previous versions, and it
  # is useful to have all of the configuration for gsutil stored in one place.
  command_runner = CommandRunner()
  if not BOTO_IS_SECURE:
    raise CommandException('\n'.join(textwrap.wrap(
        'Your boto configuration has is_secure = False. Gsutil cannot be '
        'run this way, for security reasons.')))

  headers = {}
  parallel_operations = False
  quiet = False
  version = False
  debug = 0
  trace_token = None
  perf_trace_token = None
  test_exception_traces = False

  # If user enters no commands just print the usage info.
  if len(sys.argv) == 1:
    sys.argv.append('help')

  # Change the default of the 'https_validate_certificates' boto option to
  # True (it is currently False in boto).
  if not boto.config.has_option('Boto', 'https_validate_certificates'):
    if not boto.config.has_section('Boto'):
      boto.config.add_section('Boto')
    boto.config.setbool('Boto', 'https_validate_certificates', True)

  gslib.util.configured_certs_file = gslib.util.ConfigureCertsFile()
  for signal_num in GetCaughtSignals():
    RegisterSignalHandler(signal_num, _CleanupSignalHandler)
  GetCertsFile()

  try:
    try:
      opts, args = getopt.getopt(sys.argv[1:], 'dDvo:h:mq',
                                 ['debug', 'detailedDebug', 'version', 'option',
                                  'help', 'header', 'multithreaded', 'quiet',
                                  'testexceptiontraces', 'trace-token=',
                                  'perf-trace-token='])
    except getopt.GetoptError as e:
      _HandleCommandException(CommandException(e.msg))
    for o, a in opts:
      if o in ('-d', '--debug'):
        # Also causes boto to include httplib header output.
        debug = DEBUGLEVEL_DUMP_REQUESTS
      elif o in ('-D', '--detailedDebug'):
        # We use debug level 3 to ask gsutil code to output more detailed
        # debug output. This is a bit of a hack since it overloads the same
        # flag that was originally implemented for boto use. And we use -DD
        # to ask for really detailed debugging (i.e., including HTTP payload).
        if debug == DEBUGLEVEL_DUMP_REQUESTS:
          debug = DEBUGLEVEL_DUMP_REQUESTS_AND_PAYLOADS
        else:
          debug = DEBUGLEVEL_DUMP_REQUESTS
      elif o in ('-?', '--help'):
        _OutputUsageAndExit(command_runner)
      elif o in ('-h', '--header'):
        (hdr_name, _, hdr_val) = a.partition(':')
        if not hdr_name:
          _OutputUsageAndExit(command_runner)
        headers[hdr_name.lower()] = hdr_val
      elif o in ('-m', '--multithreaded'):
        parallel_operations = True
      elif o in ('-q', '--quiet'):
        quiet = True
      elif o in ('-v', '--version'):
        version = True
      elif o == '--perf-trace-token':
        perf_trace_token = a
      elif o == '--trace-token':
        trace_token = a
      elif o == '--testexceptiontraces':  # Hidden flag for integration tests.
        test_exception_traces = True
        # Avoid printing extra warnings to stderr regarding long retries by
        # setting the threshold very high.
        gslib.util.LONG_RETRY_WARN_SEC = 3600
      elif o in ('-o', '--option'):
        (opt_section_name, _, opt_value) = a.partition('=')
        if not opt_section_name:
          _OutputUsageAndExit(command_runner)
        (opt_section, _, opt_name) = opt_section_name.partition(':')
        if not opt_section or not opt_name:
          _OutputUsageAndExit(command_runner)
        if not boto.config.has_section(opt_section):
          boto.config.add_section(opt_section)
        boto.config.set(opt_section, opt_name, opt_value)
    metrics.LogCommandParams(global_opts=opts)
    httplib2.debuglevel = debug
    if trace_token:
      sys.stderr.write(TRACE_WARNING)
    if debug >= DEBUGLEVEL_DUMP_REQUESTS:
      sys.stderr.write(DEBUG_WARNING)
      _ConfigureLogging(level=logging.DEBUG)
      command_runner.RunNamedCommand('ver', ['-l'])
      config_items = []
      try:
        config_items.extend(boto.config.items('Boto'))
        config_items.extend(boto.config.items('GSUtil'))
      except ConfigParser.NoSectionError:
        pass
      for i in xrange(len(config_items)):
        config_item_key = config_items[i][0]
        if config_item_key in CONFIG_KEYS_TO_REDACT:
          config_items[i] = (config_item_key, 'REDACTED')
      sys.stderr.write('Command being run: %s\n' % ' '.join(sys.argv))
      sys.stderr.write('config_file_list: %s\n' % GetBotoConfigFileList())
      sys.stderr.write('config: %s\n' % str(config_items))
    elif quiet:
      _ConfigureLogging(level=logging.WARNING)
    else:
      _ConfigureLogging(level=logging.INFO)
      # oauth2client uses info logging in places that would better
      # correspond to gsutil's debug logging (e.g., when refreshing
      # access tokens).
      oauth2client.client.logger.setLevel(logging.WARNING)

    if not CERTIFICATE_VALIDATION_ENABLED:
      sys.stderr.write(HTTP_WARNING)

    if version:
      command_name = 'version'
    elif not args:
      command_name = 'help'
    else:
      command_name = args[0]

    _CheckAndWarnForProxyDifferences()

    if os.environ.get('_ARGCOMPLETE', '0') == '1':
      return _PerformTabCompletion(command_runner)

    return _RunNamedCommandAndHandleExceptions(
        command_runner, command_name, args=args[1:], headers=headers,
        debug_level=debug, trace_token=trace_token,
        parallel_operations=parallel_operations,
        perf_trace_token=perf_trace_token)
  finally:
    _Cleanup()
Exemple #12
0
def main():
  # Any modules used in initializing multiprocessing variables must be
  # imported after importing gslib.__main__.
  # pylint: disable=redefined-outer-name,g-import-not-at-top
  import gslib.boto_translation
  import gslib.command
  import gslib.util
  from gslib.util import BOTO_IS_SECURE
  from gslib.util import CERTIFICATE_VALIDATION_ENABLED
  from gcs_oauth2_boto_plugin import oauth2_client
  from gslib.util import MultiprocessingIsAvailable
  if MultiprocessingIsAvailable()[0]:
    # These setup methods must be called, and, on Windows, they can only be
    # called from within an "if __name__ == '__main__':" block.
    gslib.util.InitializeMultiprocessingVariables()
    gslib.command.InitializeMultiprocessingVariables()
    gslib.boto_translation.InitializeMultiprocessingVariables()

  # This needs to be done after gslib.util.InitializeMultiprocessingVariables(),
  # since otherwise we can't call gslib.util.CreateLock.
  try:
    # pylint: disable=unused-import,g-import-not-at-top
    import gcs_oauth2_boto_plugin
    gcs_oauth2_boto_plugin.oauth2_helper.SetFallbackClientIdAndSecret(
        GSUTIL_CLIENT_ID, GSUTIL_CLIENT_NOTSOSECRET)
    gcs_oauth2_boto_plugin.oauth2_helper.SetLock(CreateLock())
  except ImportError:
    pass

  global debug
  global test_exception_traces

  if not (2, 6) <= sys.version_info[:3] < (3,):
    raise gslib.exception.CommandException(
        'gsutil requires python 2.6 or 2.7.')

  # In gsutil 4.0 and beyond, we don't use the boto library for the JSON
  # API. However, we still store gsutil configuration data in the .boto
  # config file for compatibility with previous versions and user convenience.
  # Many users have a .boto configuration file from previous versions, and it
  # is useful to have all of the configuration for gsutil stored in one place.
  command_runner = CommandRunner()
  if not BOTO_IS_SECURE:
    raise CommandException('\n'.join(textwrap.wrap(
        'Your boto configuration has is_secure = False. Gsutil cannot be '
        'run this way, for security reasons.')))

  headers = {}
  parallel_operations = False
  quiet = False
  version = False
  debug = 0
  test_exception_traces = False

  # If user enters no commands just print the usage info.
  if len(sys.argv) == 1:
    sys.argv.append('help')

  # Change the default of the 'https_validate_certificates' boto option to
  # True (it is currently False in boto).
  if not boto.config.has_option('Boto', 'https_validate_certificates'):
    if not boto.config.has_section('Boto'):
      boto.config.add_section('Boto')
    boto.config.setbool('Boto', 'https_validate_certificates', True)

  GetCertsFile()

  try:
    try:
      opts, args = getopt.getopt(sys.argv[1:], 'dDvo:h:mq',
                                 ['debug', 'detailedDebug', 'version', 'option',
                                  'help', 'header', 'multithreaded', 'quiet',
                                  'testexceptiontraces'])
    except getopt.GetoptError as e:
      _HandleCommandException(gslib.exception.CommandException(e.msg))
    for o, a in opts:
      if o in ('-d', '--debug'):
        # Passing debug=2 causes boto to include httplib header output.
        debug = 3
      elif o in ('-D', '--detailedDebug'):
        # We use debug level 3 to ask gsutil code to output more detailed
        # debug output. This is a bit of a hack since it overloads the same
        # flag that was originally implemented for boto use. And we use -DD
        # to ask for really detailed debugging (i.e., including HTTP payload).
        if debug == 3:
          debug = 4
        else:
          debug = 3
      elif o in ('-?', '--help'):
        _OutputUsageAndExit(command_runner)
      elif o in ('-h', '--header'):
        (hdr_name, _, hdr_val) = a.partition(':')
        if not hdr_name:
          _OutputUsageAndExit(command_runner)
        headers[hdr_name.lower()] = hdr_val
      elif o in ('-m', '--multithreaded'):
        parallel_operations = True
      elif o in ('-q', '--quiet'):
        quiet = True
      elif o in ('-v', '--version'):
        version = True
      elif o == '--testexceptiontraces':  # Hidden flag for integration tests.
        test_exception_traces = True
      elif o in ('-o', '--option'):
        (opt_section_name, _, opt_value) = a.partition('=')
        if not opt_section_name:
          _OutputUsageAndExit(command_runner)
        (opt_section, _, opt_name) = opt_section_name.partition(':')
        if not opt_section or not opt_name:
          _OutputUsageAndExit(command_runner)
        if not boto.config.has_section(opt_section):
          boto.config.add_section(opt_section)
        boto.config.set(opt_section, opt_name, opt_value)
    httplib2.debuglevel = debug
    if debug > 1:
      sys.stderr.write(DEBUG_WARNING)
    if debug >= 2:
      _ConfigureLogging(level=logging.DEBUG)
      command_runner.RunNamedCommand('ver', ['-l'])
      config_items = []
      try:
        config_items.extend(boto.config.items('Boto'))
        config_items.extend(boto.config.items('GSUtil'))
      except ConfigParser.NoSectionError:
        pass
      for i in xrange(len(config_items)):
        config_item_key = config_items[i][0]
        if config_item_key in CONFIG_KEYS_TO_REDACT:
          config_items[i] = (config_item_key, 'REDACTED')
      sys.stderr.write('Command being run: %s\n' % ' '.join(sys.argv))
      sys.stderr.write('config_file_list: %s\n' % GetBotoConfigFileList())
      sys.stderr.write('config: %s\n' % str(config_items))
    elif quiet:
      _ConfigureLogging(level=logging.WARNING)
    else:
      _ConfigureLogging(level=logging.INFO)
      # oauth2client uses info logging in places that would better
      # correspond to gsutil's debug logging (e.g., when refreshing
      # access tokens).
      oauth2client.client.logger.setLevel(logging.WARNING)

    if not CERTIFICATE_VALIDATION_ENABLED:
      sys.stderr.write(HTTP_WARNING)

    if version:
      command_name = 'version'
    elif not args:
      command_name = 'help'
    else:
      command_name = args[0]

    # Unset http_proxy environment variable if it's set, because it confuses
    # boto. (Proxies should instead be configured via the boto config file.)
    if 'http_proxy' in os.environ:
      if debug > 1:
        sys.stderr.write(
            'Unsetting http_proxy environment variable within gsutil run.\n')
      del os.environ['http_proxy']

    if os.environ.get('_ARGCOMPLETE', '0') == '1':
      return _PerformTabCompletion(command_runner)

    return _RunNamedCommandAndHandleExceptions(
        command_runner, command_name, args=args[1:], headers=headers,
        debug_level=debug, parallel_operations=parallel_operations)
  finally:
    _Cleanup()
Exemple #13
0
    def test_filter_existing_components_versioned(self):
        suri_builder = StorageUriBuilder(0, BucketStorageUri)
        bucket_uri = self.CreateVersionedBucket()
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        key_uploaded_correctly = self.CreateObject(object_name='foo1',
                                                   contents='1',
                                                   bucket_uri=bucket_uri)
        args_uploaded_correctly = PerformResumableUploadIfAppliesArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly,
            key_uploaded_correctly, key_uploaded_correctly.generation, {},
            tracker_file, tracker_file_lock)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        key_duplicate = self.CreateObject(object_name='foo1',
                                          contents='1',
                                          bucket_uri=bucket_uri)
        args_duplicate = PerformResumableUploadIfAppliesArgs(
            fpath_duplicate, 0, 1, fpath_duplicate, key_duplicate,
            key_duplicate.generation, {}, tracker_file, tracker_file_lock)
        object_name_duplicate = ObjectFromTracker(
            fpath_duplicate, key_duplicate.generation).object_name
        uri_duplicate = MakeGsUri(bucket_uri.bucket_name,
                                  object_name_duplicate, suri_builder)
        uri_duplicate.generation = args_duplicate.dst_uri.generation

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        key_wrong_contents = self.CreateObject(object_name='foo4',
                                               contents='_',
                                               bucket_uri=bucket_uri)
        args_wrong_contents = PerformResumableUploadIfAppliesArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents,
            key_wrong_contents, key_wrong_contents.generation, {},
            tracker_file, tracker_file_lock)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              key_uploaded_correctly.generation),
            ObjectFromTracker(fpath_duplicate, key_duplicate.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              key_wrong_contents.generation)
        ]

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_uri.bucket_name,
             suri_builder))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(str([args_uploaded_correctly.dst_uri]),
                         str(uploaded_components))
        expected_to_delete = [(args_wrong_contents.dst_uri.object_name,
                               args_wrong_contents.dst_uri.generation),
                              (uri_duplicate.object_name,
                               args_duplicate.dst_uri.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
Exemple #14
0
    def test_filter_existing_components_non_versioned(self):
        bucket_uri = self.CreateBucket()
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        key_uploaded_correctly = self.CreateObject(object_name='foo1',
                                                   contents='1',
                                                   bucket_uri=bucket_uri)
        args_uploaded_correctly = PerformResumableUploadIfAppliesArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly,
            key_uploaded_correctly, '', {}, tracker_file, tracker_file_lock)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        key_not_uploaded = self.CreateObject(object_name='foo2',
                                             contents='2',
                                             bucket_uri=bucket_uri)
        args_not_uploaded = PerformResumableUploadIfAppliesArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded, key_not_uploaded, '',
            {}, tracker_file, tracker_file_lock)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        key_wrong_contents = self.CreateObject(object_name='foo4',
                                               contents='_',
                                               bucket_uri=bucket_uri)
        args_wrong_contents = PerformResumableUploadIfAppliesArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents,
            key_wrong_contents, '', {}, tracker_file, tracker_file_lock)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        args_remote_deleted = PerformResumableUploadIfAppliesArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted, '', '', {},
            tracker_file, tracker_file_lock)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        key_no_longer_used = self.CreateObject(object_name='foo6',
                                               contents='6',
                                               bucket_uri=bucket_uri)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        suri_builder = StorageUriBuilder(0, BucketStorageUri)

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_uri.bucket_name,
             suri_builder))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(str([args_uploaded_correctly.dst_uri]),
                         str(uploaded_components))
        self.assertEqual(
            str([
                MakeGsUri(bucket_uri.bucket_name, fpath_no_longer_used,
                          suri_builder)
            ]), str(existing_objects_to_delete))