def _DeployFunction(self, *args, **kwargs):
        name = kwargs.pop('name', self._GenerateFunctionName())
        contents = kwargs.pop('function_content', FUNCTION_JS_FILE)
        command_args = []
        source = kwargs.pop('source', '')
        self._WriteFunctionSource(name,
                                  source or self.function_path,
                                  content=contents)
        if source:
            source = '--source {}'.format(source)

        for no_value_flag in args:
            command_args.append(no_value_flag.replace('_', '-'))

        for flag, flag_value in six.iteritems(kwargs):
            command_args.append('--{}'.format(flag.replace('_', '-')))
            command_args.append(flag_value)
        command_args.append('--entry-point {}'.format(name))
        try:
            command = 'functions deploy {name} {source} {args}'.format(
                name=name, source=source, args=' '.join(command_args))
            self.Run(command)
            yield name
        finally:
            delete_retryer = retry.Retryer(max_retrials=3,
                                           exponential_sleep_multiplier=2)
            delete_retryer.RetryOnException(
                self.Run, ['functions delete {} --quiet'.format(name)])
 def Execute(self, callback=None):
     storage_client = storage_api.StorageClient()
     retry.Retryer(max_retrials=3).RetryOnException(
         storage_client.CopyFileToGCS,
         args=(self.source_local_path, self.dest_obj_ref))
     if callback:
         callback()
  def testLogEntries(self):
    def FindLogEntry(parent, log_id, flag):
      """Try to read one of the entries we just wrote."""
      log_filter = 'logName={0}/logs/{1}'.format(parent, log_id)
      entry = self.RunLogging(
          'read {0} --freshness=1h --limit=1 {1}'.format(
              log_filter, flag))
      return any(entry)

    for parent, flag in self._parents_and_flags:
      log_id = next(self._name_generator)

      self.RunLogging('write {0} hello {1}'.format(log_id, flag))
      self.RunLogging(
          'write {0} urgent_hello --severity=ERROR {1}'.format(log_id, flag))
      self.RunLogging('write %s \'{"a": "hello"}\' --payload-type=json %s'
                      % (log_id, flag))

      try:
        # Total retry time of 120 sec.
        retries_ms = (15000, 30000, 30000, 45000)
        # Retry if log id was not found, it should be visible shortly after
        # the "write" command.
        retry.Retryer().RetryOnResult(
            FindLogEntry, args=[parent, log_id, flag], should_retry_if=False,
            sleep_ms=retries_ms)
      except retry.MaxRetrialsException:
        raise Exception('Retry limit exceeded. Note that this test relies on '
                        'Bigtable replication and may occasionally be flaky')
def _SetDefaultVersion(new_version, api_client):
    """Sets the given version as the default.

  Args:
    new_version: Version, The version to promote.
    api_client: appengine_api_client.AppengineApiClient to use to make requests.
  """
    metrics.CustomTimedEvent(metric_names.SET_DEFAULT_VERSION_API_START)

    # TODO(b/31824825): It sometimes takes a while for a new service to show up.
    # Retry it if we get a service not found error.
    def ShouldRetry(exc_type, unused_exc_value, unused_traceback,
                    unused_state):
        return issubclass(exc_type, core_api_exceptions.HttpException)

    try:
        retryer = retry.Retryer(max_retrials=3, exponential_sleep_multiplier=2)
        retryer.RetryOnException(api_client.SetDefaultVersion,
                                 [new_version.service, new_version.id],
                                 should_retry_if=ShouldRetry,
                                 sleep_ms=1000)
    except retry.MaxRetrialsException as e:
        (unused_result, exc_info) = e.last_result
        if exc_info:
            # This is the 3 tuple of the last exception the function threw.
            raise exc_info[0], exc_info[1], exc_info[2]
        else:
            # This shouldn't happen, but if we don't have the exception info for some
            # reason, just convert the MaxRetrialsException.
            raise exceptions.InternalError()
    metrics.CustomTimedEvent(metric_names.SET_DEFAULT_VERSION_API)
 def Execute(self, callback=None):
     """Complete one ObjectDeleteTask (safe to run in parallel)."""
     storage_client = storage_api.StorageClient()
     retry.Retryer(max_retrials=3).RetryOnException(
         storage_client.DeleteObject, args=(self.obj_ref, ))
     if callback:
         callback()
 def Enable(self, feature):
     project = properties.VALUES.core.project.GetOrFail()
     enable_api.EnableServiceIfDisabled(project, self.feature.api)
     parent = util.LocationResourceName(project)
     try:
         # Retry if we still get "API not activated"; it can take a few minutes
         # for Chemist to catch up. See b/28800908.
         # TODO(b/177098463): Add a spinner here?
         retryer = retry.Retryer(max_retrials=4,
                                 exponential_sleep_multiplier=1.75)
         op = retryer.RetryOnException(
             self.hubclient.CreateFeature,
             args=(parent, self.feature_name, feature),
             should_retry_if=self._FeatureAPINotEnabled,
             sleep_ms=1000)
     except retry.MaxRetrialsException:
         raise exceptions.Error(
             'Retry limit exceeded waiting for {} to enable'.format(
                 self.feature.api))
     except apitools_exceptions.HttpConflictError as e:
         # If the error is not due to the object already existing, re-raise.
         error = core_api_exceptions.HttpErrorPayload(e)
         if error.status_description != 'ALREADY_EXISTS':
             raise
         # TODO(b/177098463): Decide if this should be a hard error if a spec was
         # set, but not applied, because the Feature already existed.
         log.status.Print(
             '{} Feature for project [{}] is already enabled'.format(
                 self.feature.display_name, project))
         return
     msg = 'Waiting for Feature {} to be created'.format(
         self.feature.display_name)
     return self.WaitForHubOp(self.hubclient.feature_waiter,
                              op=op,
                              message=msg)
Beispiel #7
0
    def WaitForBreakpoint(self, breakpoint_id, timeout=None, retry_ms=500):
        """Waits for a breakpoint to be completed.

    Args:
      breakpoint_id: A breakpoint ID.
      timeout: The number of seconds to wait for completion.
      retry_ms: Milliseconds to wait betweeen retries.
    Returns:
      The Breakpoint message, or None if the breakpoint did not complete before
      the timeout,
    """
        retryer = retry.Retryer(max_wait_ms=1000 *
                                timeout if timeout is not None else None,
                                wait_ceiling_ms=1000)
        request = (self._debug_messages.
                   ClouddebuggerDebuggerDebuggeesBreakpointsGetRequest(
                       breakpointId=breakpoint_id,
                       debuggeeId=self.target_id,
                       clientVersion=self.CLIENT_VERSION))
        try:
            result = retryer.RetryOnResult(
                self._debug_client.debugger_debuggees_breakpoints.Get,
                [request],
                should_retry_if=lambda r, _: not r.breakpoint.isFinalState,
                sleep_ms=retry_ms)
        except retry.RetryException:
            # Timeout before the beakpoint was finalized.
            return None
        return self.AddTargetInfo(result.breakpoint)
    def WaitForOperationResult(self, operation_ref):
        """Waits for an operation to complete.

    Args:
      operation_ref: A reference to the operation on which to wait.

    Raises:
      TimeoutError: if the operation does not complete in time.

    Returns:
      The Operation object, if successful. Raises an exception on failure.
    """

        # Wait for no more than 30 minutes while retrying the Operation retrieval
        try:
            retryer = retry.Retryer(exponential_sleep_multiplier=1.1,
                                    wait_ceiling_ms=10000,
                                    max_wait_ms=30 * 60 * 1000)
            result = retryer.RetryOnResult(self.CheckResult, [operation_ref],
                                           should_retry_if=None,
                                           sleep_ms=1500)
        except retry.MaxRetrialsException:
            raise exceptions.TimeoutError(
                'Timed out while waiting for '
                'operation {}. Note that the operation '
                'is still pending.'.format(operation_ref.Name()))

        return result
Beispiel #9
0
 def wrapper(*args, **kwargs):
     retryer = retry.Retryer(max_retrials=3,
                             exponential_sleep_multiplier=2)
     return retryer.RetryOnException(func,
                                     args,
                                     kwargs,
                                     should_retry_if=retryIf)
def WaitForOperation(operation_service, operation, registry=None):
    """Wait until the operation is complete or times out.

  Args:
    operation_service: The apitools service type for operations
    operation: The operation resource to wait on
    registry: A resource registry to use for operation get requests.
  Returns:
    The operation resource when it has completed
  Raises:
    OperationTimeoutError: when the operation polling times out
    OperationError: when the operation completed with an error
  """
    if operation.done:
        return operation
    if not registry:
        registry = resources.REGISTRY
    request = registry.Parse(operation.name.split('/')[-1],
                             collection='ml.projects.operations').Request()
    try:
        operation = retry.Retryer(max_wait_ms=60 * 60 * 1000).RetryOnResult(
            operation_service.Get,
            args=(request, ),
            should_retry_if=lambda op, _: not op.done,
            sleep_ms=5000)
        if operation.error:
            raise OperationError(
                requests.ExtractErrorMessage(
                    encoding.MessageToPyValue(operation.error)))
        return operation
    except retry.WaitException:
        raise OperationTimeoutError(
            'Operation [{0}] timed out. This operation may still be underway.'.
            format(operation.name))
Beispiel #11
0
def _WaitForOperation(client, get_request, message):
    """Wait for an operation to complete.

  No operation is done instantly. Wait for it to finish following this logic:
  * we wait 1s (jitter is also 1s)
  * we query service
  * if the operation is not finished we loop to first point
  * wait limit is 620s - if we get to that point it means something is wrong
        and we can throw an exception

  Args:
    client:  The client used to make requests.
    get_request: A GetOperatioRequest message.
    message: str, The string to print while polling.

  Returns:
    True if the operation succeeded without error.

  Raises:
    FunctionsError: If the operation takes more than 620s.
  """

    with console_io.ProgressTracker(message, autotick=False) as pt:
        # This is actually linear retryer.
        retryer = retry.Retryer(exponential_sleep_multiplier=1,
                                max_wait_ms=MAX_WAIT_MS,
                                wait_ceiling_ms=WAIT_CEILING_MS)
        try:
            retryer.RetryOnResult(_GetOperationStatus, [client, get_request],
                                  {'progress_tracker': pt},
                                  should_retry_if=None,
                                  sleep_ms=SLEEP_MS)
        except retry.WaitException:
            raise exceptions.FunctionsError(
                'Operation {0} is taking too long'.format(get_request.name))
Beispiel #12
0
def _UploadFiles(files_to_upload, bucket_ref, storage_client):
    for sha1_hash, path in sorted(files_to_upload.iteritems()):
        log.debug('Uploading [{f}] to [{gcs}]'.format(f=path, gcs=sha1_hash))
        retryer = retry.Retryer(max_retrials=3)
        retryer.RetryOnException(cloud_storage.CopyFileToGCS,
                                 args=(bucket_ref, path, sha1_hash,
                                       storage_client))
def WaitOperation(name):
    """Wait till the operation is done.

  Args:
    name: The name of operation.

  Raises:
    exceptions.OperationErrorException: when the getting operation API fails.
    apitools_exceptions.HttpError: Another miscellaneous error with the peering
        service.

  Returns:
    The result of the peering operation
  """
    def _CheckOp(name, result):  # pylint: disable=missing-docstring
        op = GetOperation(name)
        if op.done:
            result.append(op)
        return not op.done

    # Wait for no more than 30 minutes while retrying the Operation retrieval
    result = []
    try:
        retry.Retryer(exponential_sleep_multiplier=1.1,
                      wait_ceiling_ms=10000,
                      max_wait_ms=30 * 60 * 1000).RetryOnResult(
                          _CheckOp, [name, result],
                          should_retry_if=True,
                          sleep_ms=2000)
    except retry.MaxRetrialsException:
        raise exceptions.TimeoutError('Timed out while waiting for '
                                      'operation {0}. Note that the operation '
                                      'is still pending.'.format(name))
    return result[0] if result else None
Beispiel #14
0
  def WaitForBreakpoint(self, breakpoint_id, timeout=None, retry_ms=500,
                        should_retry_if=None):
    """Waits for a breakpoint to be completed.

    Args:
      breakpoint_id: A breakpoint ID.
      timeout: The number of seconds to wait for completion.
      retry_ms: Milliseconds to wait betweeen retries.
      should_retry_if: A function that accepts a Breakpoint message and returns
        True if the breakpoint wait is not finished. If not specified, defaults
        to a function which just checks the isFinalState flag.
    Returns:
      The Breakpoint message, or None if the breakpoint did not complete before
      the timeout,
    """
    if not should_retry_if:
      should_retry_if = lambda r, _: not r.breakpoint.isFinalState
    retryer = retry.Retryer(
        max_wait_ms=1000*timeout if timeout is not None else None,
        wait_ceiling_ms=1000)
    request = (self._debug_messages.
               ClouddebuggerDebuggerDebuggeesBreakpointsGetRequest(
                   breakpointId=breakpoint_id, debuggeeId=self.target_id,
                   clientVersion=self.CLIENT_VERSION))
    try:
      result = retryer.RetryOnResult(self._CallGet, [request],
                                     should_retry_if=should_retry_if,
                                     sleep_ms=retry_ms)
    except retry.RetryException:
      # Timeout before the beakpoint was finalized.
      return None
    if not result.breakpoint.isFinalState:
      return None
    return self.AddTargetInfo(result.breakpoint)
Beispiel #15
0
def _WhitelistClientIP(instance_ref, sql_client, sql_messages, resources):
    """Add CLIENT_IP to the authorized networks list.

  Makes an API call to add CLIENT_IP to the authorized networks list.
  The server knows to interpret the string CLIENT_IP as the address with which
  the client reaches the server. This IP will be whitelisted for 1 minute.

  Args:
    instance_ref: resources.Resource, The instance we're connecting to.
    sql_client: apitools.BaseApiClient, A working client for the sql version
        to be used.
    sql_messages: module, The module that defines the messages for the sql
        version to be used.
    resources: resources.Registry, The registry that can create resource refs
        for the sql version to be used.

  Returns:
    string, The name of the authorized network rule. Callers can use this name
    to find out the IP the client reached the server with.
  """
    datetime_now = datetime.datetime.now(
        protorpc.util.TimeZoneOffset(datetime.timedelta(0)))

    acl_name = 'sql connect at time {0}'.format(datetime_now)
    user_acl = sql_messages.AclEntry(name=acl_name,
                                     expirationTime=datetime_now +
                                     datetime.timedelta(minutes=1),
                                     value='CLIENT_IP')

    try:
        original = sql_client.instances.Get(instance_ref.Request())
    except apitools_base.HttpError as error:
        raise exceptions.HttpException(util.GetErrorMessage(error))

    original.settings.ipConfiguration.authorizedNetworks.append(user_acl)
    patch_request = sql_messages.SqlInstancesPatchRequest(
        databaseInstance=original,
        project=instance_ref.project,
        instance=instance_ref.instance)
    result = sql_client.instances.Patch(patch_request)

    operation_ref = resources.Create('sql.operations',
                                     operation=result.name,
                                     project=instance_ref.project,
                                     instance=instance_ref.instance)
    message = 'Whitelisting your IP for incoming connection for 1 minute'

    # Due to eventual consistency, the server might not know of the operation we
    # just issued above, and throw an exception.
    # This retry is not a pooling of the operation itself, but a retry until
    # the server knows the operation actually exists.
    try:
        retryer = retry.Retryer(max_retrials=2, exponential_sleep_multiplier=2)
        retryer.RetryOnException(util.WaitForOperationV1Beta4,
                                 [sql_client, operation_ref, message],
                                 sleep_ms=500)
    except retry.RetryException:
        raise exceptions.ToolException('Could not whitelist client IP.')

    return acl_name
Beispiel #16
0
    def Lock(self):
        """Opens and locks the file. A no-op if this FileLock is already locked.

    The lock file is created if it does not already exist.

    Raises:
      FileLockLockingError: if the file could not be opened (or created when
        necessary).
      FileLockTimeoutError: if the file could not be locked before the timeout
        elapsed.
    """
        if self._locked:
            return
        try:
            self._file = FileWriter(self._path)
        except Error as e:
            raise FileLockLockingError(e)

        max_wait_ms = None
        if self._timeout_secs is not None:
            max_wait_ms = 1000 * self._timeout_secs

        r = retry.Retryer(max_wait_ms=max_wait_ms)
        try:
            r.RetryOnException(self._impl.TryLock,
                               args=[self._file.fileno()],
                               sleep_ms=100)
        except retry.RetryException as e:
            self._file.close()
            self._file = None
            raise FileLockTimeoutError(
                'Timed-out waiting to lock file: {0}'.format(self._path))
        else:
            self._locked = True
def WaitForRestoreToFinish(restore,
                           max_wait_ms=1800000,
                           exponential_sleep_multiplier=1.4,
                           jitter_ms=1000,
                           wait_ceiling_ms=180000,
                           status_update=_RestoreStatusUpdate,
                           sleep_ms=2000,
                           client=None):
  """Waits for restore resource to be terminal state."""
  if not client:
    client = GetClientInstance()
  messages = GetMessagesModule()
  retryer = retry.Retryer(
      max_retrials=None,
      max_wait_ms=max_wait_ms,
      exponential_sleep_multiplier=exponential_sleep_multiplier,
      jitter_ms=jitter_ms,
      wait_ceiling_ms=wait_ceiling_ms,
      status_update_func=status_update)
  restore_poller = RestorePoller(client, messages)
  try:
    result = retryer.RetryOnResult(
        func=restore_poller.Poll,
        args=(restore,),
        should_retry_if=restore_poller.IsNotDone,
        sleep_ms=sleep_ms)
    log.Print('Restore completed. Restore state: {0}'.format(result.state))
    return result
  # No need to catch MaxRetrialsException since we retry unlimitedly.
  except retry.WaitException:
    raise WaitForCompletionTimeoutError(
        'Timeout waiting for restore to complete. Restore is not completed, use "gcloud container backup-restore restores describe" command to check restore status.'
    )
Beispiel #18
0
def _UploadFile(file_upload_task):
  """Upload a single file to Google Cloud Storage.

  Args:
    file_upload_task: FileUploadTask describing the file to upload

  Returns:
    None if the file was uploaded successfully or a stringified Exception if one
    was raised
  """
  storage_client = storage_api.StorageClient()
  bucket_ref = storage_util.BucketReference.FromBucketUrl(
      file_upload_task.bucket_url)
  retryer = retry.Retryer(max_retrials=3)

  path = file_upload_task.path
  sha1_hash = file_upload_task.sha1_hash
  log.debug('Uploading [{f}] to [{gcs}]'.format(f=path, gcs=sha1_hash))
  try:
    retryer.RetryOnException(
        storage_client.CopyFileToGCS,
        args=(bucket_ref, path, sha1_hash)
    )
  except Exception as err:  # pylint: disable=broad-except
    # pass all errors through as strings (not all exceptions can be serialized)
    return str(err)
  return None
Beispiel #19
0
    def __init__(self,
                 remote,
                 port=None,
                 identity_file=None,
                 options=None,
                 extra_flags=None,
                 max_wait_ms=60 * 1000,
                 sleep_ms=5 * 1000):
        """Construct a poller for an SSH connection.

    Args:
      remote: Remote, the remote to poll.
      port: int, port to poll.
      identity_file: str, path to private key file.
      options: {str: str}, options (`-o`) for OpenSSH, see `ssh_config(5)`.
      extra_flags: [str], extra flags to append to ssh invocation. Both binary
        style flags `['-b']` and flags with values `['-k', 'v']` are accepted.
      max_wait_ms: int, number of ms to wait before raising.
      sleep_ms: int, time between trials.
    """
        self.ssh_command = SSHCommand(remote,
                                      port=port,
                                      identity_file=identity_file,
                                      options=options,
                                      extra_flags=extra_flags,
                                      remote_command=['true'],
                                      tty=False)
        self._sleep_ms = sleep_ms
        self._retryer = retry.Retryer(max_wait_ms=max_wait_ms, jitter_ms=0)
Beispiel #20
0
def WaitForOperation(client,
                     messages,
                     operation,
                     description,
                     extra_stages=None):
    """Wait for a long-running operation (LRO) to complete.

  Args:
    client: The GCFv2 API client.
    messages: The GCFv2 message stubs.
    operation: The operation message response.
    description: str, the description of the waited operation.
    extra_stages: List[progress_tracker.Stage]|None, list of optional stages for
      the progress tracker to watch. The GCF 2nd api returns unexpected stages
      in the case of rollbacks.
  """
    request = messages.CloudfunctionsProjectsLocationsOperationsGetRequest(
        name=operation.name)
    # Wait for stages to be loaded.
    with progress_tracker.ProgressTracker('Preparing function') as tracker:
        retryer = retry.Retryer(max_wait_ms=MAX_WAIT_MS)
        try:
            # List[progress_tracker.Stage]
            stages = retryer.RetryOnResult(_GetStages,
                                           args=[client, request, messages],
                                           should_retry_if=None,
                                           sleep_ms=SLEEP_MS)
        except retry.WaitException:
            raise exceptions.FunctionsError(
                'Operation {0} is taking too long'.format(request.name))

    if extra_stages is not None:
        stages += extra_stages

    # Wait for LRO to complete.
    description += '...'
    with progress_tracker.StagedProgressTracker(description,
                                                stages) as tracker:
        retryer = retry.Retryer(max_wait_ms=MAX_WAIT_MS)
        try:
            retryer.RetryOnResult(_GetOperationStatus,
                                  args=[client, request, tracker, messages],
                                  should_retry_if=False,
                                  sleep_ms=SLEEP_MS)
        except retry.WaitException:
            raise exceptions.FunctionsError(
                'Operation {0} is taking too long'.format(request.name))
Beispiel #21
0
def _DeleteObject(value):
    """Complete one ObjectDeleteTask (safe to run in parallel)."""
    object_delete_task, callback = value
    storage_client = storage_api.StorageClient()
    retry.Retryer(max_retrials=3).RetryOnException(
        storage_client.DeleteObject, args=(object_delete_task.obj_ref, ))
    if callback:
        callback()
Beispiel #22
0
def _UploadFile(value):
    """Complete one FileUploadTask (safe to run in parallel)."""
    file_upload_task, callback = value
    storage_client = storage_api.StorageClient()
    retry.Retryer(max_retrials=3).RetryOnException(
        storage_client.CopyFileToGCS,
        args=(file_upload_task.local_path, file_upload_task.dest_obj_ref))
    if callback:
        callback()
Beispiel #23
0
def block_until_operation_created(name):
    """Blocks until job creates an operation and returns operation name."""
    return retry.Retryer().RetryOnResult(
        api_get,
        args=[name],
        should_retry_if=_has_not_created_operation,
        sleep_ms=(
            properties.VALUES.transfer.no_async_polling_interval_ms.GetInt()),
    ).latestOperationName
 def _WaitForValue(self, cb, name, value='', text=''):
     r = retry.Retryer(max_retrials=10, exponential_sleep_multiplier=2.0)
     try:
         r.RetryOnResult(self._CheckValue,
                         args=[cb, name, value, text],
                         sleep_ms=500,
                         should_retry_if=False)
     except retry.MaxRetrialsException:
         self.fail('Could not retrieve value for {0} in time'.format(name))
def _DeleteObject(object_delete_task):
  """Complete one ObjectDeleteTask (safe to run in parallel)."""
  storage_client = storage_api.StorageClient()
  bucket_ref = storage_util.BucketReference.FromBucketUrl(
      object_delete_task.bucket_url)

  retry.Retryer(max_retrials=3).RetryOnException(
      storage_client.DeleteObject,
      args=(bucket_ref, object_delete_task.remote_path))
def CopyFilesToCodeBucket(modules, bucket, source_contexts):
    """Examines modules and copies files to a Google Cloud Storage bucket.

  Args:
    modules: [(str, ModuleYamlInfo)] List of pairs of module name, and parsed
      module information.
    bucket: str A URL to the Google Cloud Storage bucket where the files will be
      uploaded.
    source_contexts: [dict] List of json-serializable source contexts
      associated with the modules.
  Returns:
    A lookup from module name to a dictionary representing the manifest. See
    _BuildStagingDirectory.
  """
    manifests = {}
    with file_utils.TemporaryDirectory() as staging_directory:
        for (module, info) in modules:
            source_directory = os.path.dirname(info.file)
            excluded_files_regex = info.parsed.skip_files.regex

            manifest = _BuildStagingDirectory(source_directory,
                                              staging_directory, bucket,
                                              excluded_files_regex,
                                              source_contexts)
            manifests[module] = manifest

        if any(manifest for manifest in manifests.itervalues()):
            log.status.Print('Copying files to Google Cloud Storage...')
            log.status.Print('Synchronizing files to [{b}].'.format(b=bucket))
            try:
                log.SetUserOutputEnabled(False)

                def _StatusUpdate(result, unused_retry_state):
                    log.info('Error synchronizing files. Return code: {0}. '
                             'Retrying.'.format(result))

                retryer = retry.Retryer(max_retrials=3,
                                        status_update_func=_StatusUpdate)

                def _ShouldRetry(return_code, unused_retry_state):
                    return return_code != 0

                try:
                    retryer.RetryOnResult(cloud_storage.Rsync,
                                          (staging_directory, bucket),
                                          should_retry_if=_ShouldRetry)
                except retry.RetryException as e:
                    raise exceptions.ToolException((
                        'Could not synchronize files. The gsutil command exited with '
                        'status [{s}]. Command output is available in [{l}].'
                    ).format(s=e.last_result, l=log.GetLogFilePath()))
            finally:
                # Reset to the standard log level.
                log.SetUserOutputEnabled(None)

    return manifests
Beispiel #27
0
def WaitFor(poller, operation_ref, message):
    """Waits with retrues for operation to be done given poller.

  Args:
    poller: OperationPoller, poller to use during retrials.
    operation_ref: object, passed to operation poller poll method.
    message: str, string to display for progrss_tracker.

  Returns:
    poller.GetResult(operation).

  Raises:
    AbortWaitError: if ctrl-c was pressed.
    TimeoutError: if retryer has finished wihout being done.
  """
    def _CtrlCHandler(unused_signal, unused_frame):
        raise AbortWaitError('Ctrl-C aborted wait.')

    try:
        with execution_utils.CtrlCSection(_CtrlCHandler):
            try:
                with progress_tracker.ProgressTracker(message) as tracker:

                    if poller.PRE_START_SLEEP_MS:
                        _SleepMs(poller.PRE_START_SLEEP_MS)

                    def _StatusUpdate(unused_result, unused_status):
                        tracker.Tick()

                    retryer = retry.Retryer(
                        max_retrials=poller.MAX_RETRIALS,
                        max_wait_ms=poller.MAX_WAIT_MS,
                        exponential_sleep_multiplier=poller.
                        EXPONENTIAL_SLEEP_MULTIPLIER,
                        jitter_ms=poller.JITTER_MS,
                        status_update_func=_StatusUpdate)

                    def _IsNotDone(operation, unused_state):
                        return not poller.IsDone(operation)

                    operation = retryer.RetryOnResult(
                        func=poller.Poll,
                        args=(operation_ref, ),
                        should_retry_if=_IsNotDone,
                        sleep_ms=poller.SLEEP_MS)
            except retry.RetryException:
                raise TimeoutError(
                    'Operation {0} has not finished in {1} seconds'.format(
                        operation_ref, int(poller.MAX_WAIT_MS / 1000)))
    except AbortWaitError:
        # Write this out now that progress tracker is done.
        sys.stderr.write(
            'Aborting wait for operation {0}.\n'.format(operation_ref))
        raise

    return poller.GetResult(operation)
Beispiel #28
0
def WaitForOperation(op_name, client):
    """Waits for an operation to complete.

  Args:
    op_name: The name of the operation on which to wait.
    client: The client object that contains the GetOperation request object.

  Raises:
    ToolException: if the operation does not complete in time.
    OperationErrorException: if the operation fails.

  Returns:
    The Operation object, if successful. Raises an exception on failure.
  """
    WaitForOperation.operation_response = None

    messages = GetMessagesModule()

    def _CheckOperation(op_name):  # pylint: disable=missing-docstring
        op_name = ParseOperationName(op_name)

        request = messages.ServicemanagementOperationsGetRequest(
            operationsId=op_name, )

        try:
            result = client.operations.Get(request)
        except apitools_exceptions.HttpError as error:
            raise exceptions.HttpException(GetError(error))

        if result.done:
            WaitForOperation.operation_response = result
            return True
        else:
            return False

    # Wait for no more than 30 minutes while retrying the Operation retrieval
    try:
        retry.Retryer(exponential_sleep_multiplier=1.1,
                      wait_ceiling_ms=10000,
                      max_wait_ms=30 * 60 * 1000).RetryOnResult(
                          _CheckOperation, [op_name],
                          should_retry_if=False,
                          sleep_ms=1500)
    except retry.MaxRetrialsException:
        raise exceptions.ToolException('Timed out while waiting for '
                                       'operation %s. Note that the operation '
                                       'is still pending.' % op_name)

    # Check to see if the operation resulted in an error
    if WaitForOperation.operation_response.error is not None:
        raise OperationErrorException(
            'The operation with ID {0} resulted in a failure.'.format(op_name))

    # If we've gotten this far, the operation completed successfully,
    # so return the Operation object
    return WaitForOperation.operation_response
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = args.CONCEPTS.cluster.Parse()

        request = None
        if args.tarball_access is not None:
            tarball_access = arg_utils.ChoiceToEnum(
                args.tarball_access, dataproc.messages.DiagnoseClusterRequest.
                TarballAccessValueValuesEnum)
            diagnose_request = dataproc.messages.DiagnoseClusterRequest(
                tarballAccess=tarball_access)
            request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
                clusterName=cluster_ref.clusterName,
                region=cluster_ref.region,
                projectId=cluster_ref.projectId,
                diagnoseClusterRequest=diagnose_request)
        else:
            request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
                clusterName=cluster_ref.clusterName,
                region=cluster_ref.region,
                projectId=cluster_ref.projectId)

        operation = dataproc.client.projects_regions_clusters.Diagnose(request)
        # TODO(b/36052522): Stream output during polling.
        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster diagnose operation',
            timeout_s=args.timeout)

        if not operation.response:
            raise exceptions.OperationError('Operation is missing response')

        properties = encoding.MessageToDict(operation.response)
        output_uri = properties['outputUri']

        if not output_uri:
            raise exceptions.OperationError('Response is missing outputUri')

        log.err.Print('Output from diagnostic:')
        log.err.Print('-----------------------------------------------')
        driver_log_stream = storage_helpers.StorageObjectSeriesStream(
            output_uri)
        # A single read might not read whole stream. Try a few times.
        read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
        try:
            read_retrier.RetryOnResult(
                lambda: driver_log_stream.ReadIntoWritable(log.err),
                sleep_ms=100,
                should_retry_if=lambda *_: driver_log_stream.open)
        except retry.MaxRetrialsException:
            log.warning('Diagnostic finished successfully, '
                        'but output did not finish streaming.')
        log.err.Print('-----------------------------------------------')
        return output_uri
def _UploadFile(file_upload_task):
  """Complete one FileUploadTask (safe to run in parallel)."""
  storage_client = storage_api.StorageClient()
  bucket_ref = storage_util.BucketReference.FromBucketUrl(
      file_upload_task.bucket_url)

  local_path = file_upload_task.local_path
  retry.Retryer(max_retrials=3).RetryOnException(
      storage_client.CopyFileToGCS,
      args=(bucket_ref, local_path, file_upload_task.remote_path))