def _DeployFunction(self, *args, **kwargs): name = kwargs.pop('name', self._GenerateFunctionName()) contents = kwargs.pop('function_content', FUNCTION_JS_FILE) command_args = [] source = kwargs.pop('source', '') self._WriteFunctionSource(name, source or self.function_path, content=contents) if source: source = '--source {}'.format(source) for no_value_flag in args: command_args.append(no_value_flag.replace('_', '-')) for flag, flag_value in six.iteritems(kwargs): command_args.append('--{}'.format(flag.replace('_', '-'))) command_args.append(flag_value) command_args.append('--entry-point {}'.format(name)) try: command = 'functions deploy {name} {source} {args}'.format( name=name, source=source, args=' '.join(command_args)) self.Run(command) yield name finally: delete_retryer = retry.Retryer(max_retrials=3, exponential_sleep_multiplier=2) delete_retryer.RetryOnException( self.Run, ['functions delete {} --quiet'.format(name)])
def Execute(self, callback=None): storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.CopyFileToGCS, args=(self.source_local_path, self.dest_obj_ref)) if callback: callback()
def testLogEntries(self): def FindLogEntry(parent, log_id, flag): """Try to read one of the entries we just wrote.""" log_filter = 'logName={0}/logs/{1}'.format(parent, log_id) entry = self.RunLogging( 'read {0} --freshness=1h --limit=1 {1}'.format( log_filter, flag)) return any(entry) for parent, flag in self._parents_and_flags: log_id = next(self._name_generator) self.RunLogging('write {0} hello {1}'.format(log_id, flag)) self.RunLogging( 'write {0} urgent_hello --severity=ERROR {1}'.format(log_id, flag)) self.RunLogging('write %s \'{"a": "hello"}\' --payload-type=json %s' % (log_id, flag)) try: # Total retry time of 120 sec. retries_ms = (15000, 30000, 30000, 45000) # Retry if log id was not found, it should be visible shortly after # the "write" command. retry.Retryer().RetryOnResult( FindLogEntry, args=[parent, log_id, flag], should_retry_if=False, sleep_ms=retries_ms) except retry.MaxRetrialsException: raise Exception('Retry limit exceeded. Note that this test relies on ' 'Bigtable replication and may occasionally be flaky')
def _SetDefaultVersion(new_version, api_client): """Sets the given version as the default. Args: new_version: Version, The version to promote. api_client: appengine_api_client.AppengineApiClient to use to make requests. """ metrics.CustomTimedEvent(metric_names.SET_DEFAULT_VERSION_API_START) # TODO(b/31824825): It sometimes takes a while for a new service to show up. # Retry it if we get a service not found error. def ShouldRetry(exc_type, unused_exc_value, unused_traceback, unused_state): return issubclass(exc_type, core_api_exceptions.HttpException) try: retryer = retry.Retryer(max_retrials=3, exponential_sleep_multiplier=2) retryer.RetryOnException(api_client.SetDefaultVersion, [new_version.service, new_version.id], should_retry_if=ShouldRetry, sleep_ms=1000) except retry.MaxRetrialsException as e: (unused_result, exc_info) = e.last_result if exc_info: # This is the 3 tuple of the last exception the function threw. raise exc_info[0], exc_info[1], exc_info[2] else: # This shouldn't happen, but if we don't have the exception info for some # reason, just convert the MaxRetrialsException. raise exceptions.InternalError() metrics.CustomTimedEvent(metric_names.SET_DEFAULT_VERSION_API)
def Execute(self, callback=None): """Complete one ObjectDeleteTask (safe to run in parallel).""" storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(self.obj_ref, )) if callback: callback()
def Enable(self, feature): project = properties.VALUES.core.project.GetOrFail() enable_api.EnableServiceIfDisabled(project, self.feature.api) parent = util.LocationResourceName(project) try: # Retry if we still get "API not activated"; it can take a few minutes # for Chemist to catch up. See b/28800908. # TODO(b/177098463): Add a spinner here? retryer = retry.Retryer(max_retrials=4, exponential_sleep_multiplier=1.75) op = retryer.RetryOnException( self.hubclient.CreateFeature, args=(parent, self.feature_name, feature), should_retry_if=self._FeatureAPINotEnabled, sleep_ms=1000) except retry.MaxRetrialsException: raise exceptions.Error( 'Retry limit exceeded waiting for {} to enable'.format( self.feature.api)) except apitools_exceptions.HttpConflictError as e: # If the error is not due to the object already existing, re-raise. error = core_api_exceptions.HttpErrorPayload(e) if error.status_description != 'ALREADY_EXISTS': raise # TODO(b/177098463): Decide if this should be a hard error if a spec was # set, but not applied, because the Feature already existed. log.status.Print( '{} Feature for project [{}] is already enabled'.format( self.feature.display_name, project)) return msg = 'Waiting for Feature {} to be created'.format( self.feature.display_name) return self.WaitForHubOp(self.hubclient.feature_waiter, op=op, message=msg)
def WaitForBreakpoint(self, breakpoint_id, timeout=None, retry_ms=500): """Waits for a breakpoint to be completed. Args: breakpoint_id: A breakpoint ID. timeout: The number of seconds to wait for completion. retry_ms: Milliseconds to wait betweeen retries. Returns: The Breakpoint message, or None if the breakpoint did not complete before the timeout, """ retryer = retry.Retryer(max_wait_ms=1000 * timeout if timeout is not None else None, wait_ceiling_ms=1000) request = (self._debug_messages. ClouddebuggerDebuggerDebuggeesBreakpointsGetRequest( breakpointId=breakpoint_id, debuggeeId=self.target_id, clientVersion=self.CLIENT_VERSION)) try: result = retryer.RetryOnResult( self._debug_client.debugger_debuggees_breakpoints.Get, [request], should_retry_if=lambda r, _: not r.breakpoint.isFinalState, sleep_ms=retry_ms) except retry.RetryException: # Timeout before the beakpoint was finalized. return None return self.AddTargetInfo(result.breakpoint)
def WaitForOperationResult(self, operation_ref): """Waits for an operation to complete. Args: operation_ref: A reference to the operation on which to wait. Raises: TimeoutError: if the operation does not complete in time. Returns: The Operation object, if successful. Raises an exception on failure. """ # Wait for no more than 30 minutes while retrying the Operation retrieval try: retryer = retry.Retryer(exponential_sleep_multiplier=1.1, wait_ceiling_ms=10000, max_wait_ms=30 * 60 * 1000) result = retryer.RetryOnResult(self.CheckResult, [operation_ref], should_retry_if=None, sleep_ms=1500) except retry.MaxRetrialsException: raise exceptions.TimeoutError( 'Timed out while waiting for ' 'operation {}. Note that the operation ' 'is still pending.'.format(operation_ref.Name())) return result
def wrapper(*args, **kwargs): retryer = retry.Retryer(max_retrials=3, exponential_sleep_multiplier=2) return retryer.RetryOnException(func, args, kwargs, should_retry_if=retryIf)
def WaitForOperation(operation_service, operation, registry=None): """Wait until the operation is complete or times out. Args: operation_service: The apitools service type for operations operation: The operation resource to wait on registry: A resource registry to use for operation get requests. Returns: The operation resource when it has completed Raises: OperationTimeoutError: when the operation polling times out OperationError: when the operation completed with an error """ if operation.done: return operation if not registry: registry = resources.REGISTRY request = registry.Parse(operation.name.split('/')[-1], collection='ml.projects.operations').Request() try: operation = retry.Retryer(max_wait_ms=60 * 60 * 1000).RetryOnResult( operation_service.Get, args=(request, ), should_retry_if=lambda op, _: not op.done, sleep_ms=5000) if operation.error: raise OperationError( requests.ExtractErrorMessage( encoding.MessageToPyValue(operation.error))) return operation except retry.WaitException: raise OperationTimeoutError( 'Operation [{0}] timed out. This operation may still be underway.'. format(operation.name))
def _WaitForOperation(client, get_request, message): """Wait for an operation to complete. No operation is done instantly. Wait for it to finish following this logic: * we wait 1s (jitter is also 1s) * we query service * if the operation is not finished we loop to first point * wait limit is 620s - if we get to that point it means something is wrong and we can throw an exception Args: client: The client used to make requests. get_request: A GetOperatioRequest message. message: str, The string to print while polling. Returns: True if the operation succeeded without error. Raises: FunctionsError: If the operation takes more than 620s. """ with console_io.ProgressTracker(message, autotick=False) as pt: # This is actually linear retryer. retryer = retry.Retryer(exponential_sleep_multiplier=1, max_wait_ms=MAX_WAIT_MS, wait_ceiling_ms=WAIT_CEILING_MS) try: retryer.RetryOnResult(_GetOperationStatus, [client, get_request], {'progress_tracker': pt}, should_retry_if=None, sleep_ms=SLEEP_MS) except retry.WaitException: raise exceptions.FunctionsError( 'Operation {0} is taking too long'.format(get_request.name))
def _UploadFiles(files_to_upload, bucket_ref, storage_client): for sha1_hash, path in sorted(files_to_upload.iteritems()): log.debug('Uploading [{f}] to [{gcs}]'.format(f=path, gcs=sha1_hash)) retryer = retry.Retryer(max_retrials=3) retryer.RetryOnException(cloud_storage.CopyFileToGCS, args=(bucket_ref, path, sha1_hash, storage_client))
def WaitOperation(name): """Wait till the operation is done. Args: name: The name of operation. Raises: exceptions.OperationErrorException: when the getting operation API fails. apitools_exceptions.HttpError: Another miscellaneous error with the peering service. Returns: The result of the peering operation """ def _CheckOp(name, result): # pylint: disable=missing-docstring op = GetOperation(name) if op.done: result.append(op) return not op.done # Wait for no more than 30 minutes while retrying the Operation retrieval result = [] try: retry.Retryer(exponential_sleep_multiplier=1.1, wait_ceiling_ms=10000, max_wait_ms=30 * 60 * 1000).RetryOnResult( _CheckOp, [name, result], should_retry_if=True, sleep_ms=2000) except retry.MaxRetrialsException: raise exceptions.TimeoutError('Timed out while waiting for ' 'operation {0}. Note that the operation ' 'is still pending.'.format(name)) return result[0] if result else None
def WaitForBreakpoint(self, breakpoint_id, timeout=None, retry_ms=500, should_retry_if=None): """Waits for a breakpoint to be completed. Args: breakpoint_id: A breakpoint ID. timeout: The number of seconds to wait for completion. retry_ms: Milliseconds to wait betweeen retries. should_retry_if: A function that accepts a Breakpoint message and returns True if the breakpoint wait is not finished. If not specified, defaults to a function which just checks the isFinalState flag. Returns: The Breakpoint message, or None if the breakpoint did not complete before the timeout, """ if not should_retry_if: should_retry_if = lambda r, _: not r.breakpoint.isFinalState retryer = retry.Retryer( max_wait_ms=1000*timeout if timeout is not None else None, wait_ceiling_ms=1000) request = (self._debug_messages. ClouddebuggerDebuggerDebuggeesBreakpointsGetRequest( breakpointId=breakpoint_id, debuggeeId=self.target_id, clientVersion=self.CLIENT_VERSION)) try: result = retryer.RetryOnResult(self._CallGet, [request], should_retry_if=should_retry_if, sleep_ms=retry_ms) except retry.RetryException: # Timeout before the beakpoint was finalized. return None if not result.breakpoint.isFinalState: return None return self.AddTargetInfo(result.breakpoint)
def _WhitelistClientIP(instance_ref, sql_client, sql_messages, resources): """Add CLIENT_IP to the authorized networks list. Makes an API call to add CLIENT_IP to the authorized networks list. The server knows to interpret the string CLIENT_IP as the address with which the client reaches the server. This IP will be whitelisted for 1 minute. Args: instance_ref: resources.Resource, The instance we're connecting to. sql_client: apitools.BaseApiClient, A working client for the sql version to be used. sql_messages: module, The module that defines the messages for the sql version to be used. resources: resources.Registry, The registry that can create resource refs for the sql version to be used. Returns: string, The name of the authorized network rule. Callers can use this name to find out the IP the client reached the server with. """ datetime_now = datetime.datetime.now( protorpc.util.TimeZoneOffset(datetime.timedelta(0))) acl_name = 'sql connect at time {0}'.format(datetime_now) user_acl = sql_messages.AclEntry(name=acl_name, expirationTime=datetime_now + datetime.timedelta(minutes=1), value='CLIENT_IP') try: original = sql_client.instances.Get(instance_ref.Request()) except apitools_base.HttpError as error: raise exceptions.HttpException(util.GetErrorMessage(error)) original.settings.ipConfiguration.authorizedNetworks.append(user_acl) patch_request = sql_messages.SqlInstancesPatchRequest( databaseInstance=original, project=instance_ref.project, instance=instance_ref.instance) result = sql_client.instances.Patch(patch_request) operation_ref = resources.Create('sql.operations', operation=result.name, project=instance_ref.project, instance=instance_ref.instance) message = 'Whitelisting your IP for incoming connection for 1 minute' # Due to eventual consistency, the server might not know of the operation we # just issued above, and throw an exception. # This retry is not a pooling of the operation itself, but a retry until # the server knows the operation actually exists. try: retryer = retry.Retryer(max_retrials=2, exponential_sleep_multiplier=2) retryer.RetryOnException(util.WaitForOperationV1Beta4, [sql_client, operation_ref, message], sleep_ms=500) except retry.RetryException: raise exceptions.ToolException('Could not whitelist client IP.') return acl_name
def Lock(self): """Opens and locks the file. A no-op if this FileLock is already locked. The lock file is created if it does not already exist. Raises: FileLockLockingError: if the file could not be opened (or created when necessary). FileLockTimeoutError: if the file could not be locked before the timeout elapsed. """ if self._locked: return try: self._file = FileWriter(self._path) except Error as e: raise FileLockLockingError(e) max_wait_ms = None if self._timeout_secs is not None: max_wait_ms = 1000 * self._timeout_secs r = retry.Retryer(max_wait_ms=max_wait_ms) try: r.RetryOnException(self._impl.TryLock, args=[self._file.fileno()], sleep_ms=100) except retry.RetryException as e: self._file.close() self._file = None raise FileLockTimeoutError( 'Timed-out waiting to lock file: {0}'.format(self._path)) else: self._locked = True
def WaitForRestoreToFinish(restore, max_wait_ms=1800000, exponential_sleep_multiplier=1.4, jitter_ms=1000, wait_ceiling_ms=180000, status_update=_RestoreStatusUpdate, sleep_ms=2000, client=None): """Waits for restore resource to be terminal state.""" if not client: client = GetClientInstance() messages = GetMessagesModule() retryer = retry.Retryer( max_retrials=None, max_wait_ms=max_wait_ms, exponential_sleep_multiplier=exponential_sleep_multiplier, jitter_ms=jitter_ms, wait_ceiling_ms=wait_ceiling_ms, status_update_func=status_update) restore_poller = RestorePoller(client, messages) try: result = retryer.RetryOnResult( func=restore_poller.Poll, args=(restore,), should_retry_if=restore_poller.IsNotDone, sleep_ms=sleep_ms) log.Print('Restore completed. Restore state: {0}'.format(result.state)) return result # No need to catch MaxRetrialsException since we retry unlimitedly. except retry.WaitException: raise WaitForCompletionTimeoutError( 'Timeout waiting for restore to complete. Restore is not completed, use "gcloud container backup-restore restores describe" command to check restore status.' )
def _UploadFile(file_upload_task): """Upload a single file to Google Cloud Storage. Args: file_upload_task: FileUploadTask describing the file to upload Returns: None if the file was uploaded successfully or a stringified Exception if one was raised """ storage_client = storage_api.StorageClient() bucket_ref = storage_util.BucketReference.FromBucketUrl( file_upload_task.bucket_url) retryer = retry.Retryer(max_retrials=3) path = file_upload_task.path sha1_hash = file_upload_task.sha1_hash log.debug('Uploading [{f}] to [{gcs}]'.format(f=path, gcs=sha1_hash)) try: retryer.RetryOnException( storage_client.CopyFileToGCS, args=(bucket_ref, path, sha1_hash) ) except Exception as err: # pylint: disable=broad-except # pass all errors through as strings (not all exceptions can be serialized) return str(err) return None
def __init__(self, remote, port=None, identity_file=None, options=None, extra_flags=None, max_wait_ms=60 * 1000, sleep_ms=5 * 1000): """Construct a poller for an SSH connection. Args: remote: Remote, the remote to poll. port: int, port to poll. identity_file: str, path to private key file. options: {str: str}, options (`-o`) for OpenSSH, see `ssh_config(5)`. extra_flags: [str], extra flags to append to ssh invocation. Both binary style flags `['-b']` and flags with values `['-k', 'v']` are accepted. max_wait_ms: int, number of ms to wait before raising. sleep_ms: int, time between trials. """ self.ssh_command = SSHCommand(remote, port=port, identity_file=identity_file, options=options, extra_flags=extra_flags, remote_command=['true'], tty=False) self._sleep_ms = sleep_ms self._retryer = retry.Retryer(max_wait_ms=max_wait_ms, jitter_ms=0)
def WaitForOperation(client, messages, operation, description, extra_stages=None): """Wait for a long-running operation (LRO) to complete. Args: client: The GCFv2 API client. messages: The GCFv2 message stubs. operation: The operation message response. description: str, the description of the waited operation. extra_stages: List[progress_tracker.Stage]|None, list of optional stages for the progress tracker to watch. The GCF 2nd api returns unexpected stages in the case of rollbacks. """ request = messages.CloudfunctionsProjectsLocationsOperationsGetRequest( name=operation.name) # Wait for stages to be loaded. with progress_tracker.ProgressTracker('Preparing function') as tracker: retryer = retry.Retryer(max_wait_ms=MAX_WAIT_MS) try: # List[progress_tracker.Stage] stages = retryer.RetryOnResult(_GetStages, args=[client, request, messages], should_retry_if=None, sleep_ms=SLEEP_MS) except retry.WaitException: raise exceptions.FunctionsError( 'Operation {0} is taking too long'.format(request.name)) if extra_stages is not None: stages += extra_stages # Wait for LRO to complete. description += '...' with progress_tracker.StagedProgressTracker(description, stages) as tracker: retryer = retry.Retryer(max_wait_ms=MAX_WAIT_MS) try: retryer.RetryOnResult(_GetOperationStatus, args=[client, request, tracker, messages], should_retry_if=False, sleep_ms=SLEEP_MS) except retry.WaitException: raise exceptions.FunctionsError( 'Operation {0} is taking too long'.format(request.name))
def _DeleteObject(value): """Complete one ObjectDeleteTask (safe to run in parallel).""" object_delete_task, callback = value storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(object_delete_task.obj_ref, )) if callback: callback()
def _UploadFile(value): """Complete one FileUploadTask (safe to run in parallel).""" file_upload_task, callback = value storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.CopyFileToGCS, args=(file_upload_task.local_path, file_upload_task.dest_obj_ref)) if callback: callback()
def block_until_operation_created(name): """Blocks until job creates an operation and returns operation name.""" return retry.Retryer().RetryOnResult( api_get, args=[name], should_retry_if=_has_not_created_operation, sleep_ms=( properties.VALUES.transfer.no_async_polling_interval_ms.GetInt()), ).latestOperationName
def _WaitForValue(self, cb, name, value='', text=''): r = retry.Retryer(max_retrials=10, exponential_sleep_multiplier=2.0) try: r.RetryOnResult(self._CheckValue, args=[cb, name, value, text], sleep_ms=500, should_retry_if=False) except retry.MaxRetrialsException: self.fail('Could not retrieve value for {0} in time'.format(name))
def _DeleteObject(object_delete_task): """Complete one ObjectDeleteTask (safe to run in parallel).""" storage_client = storage_api.StorageClient() bucket_ref = storage_util.BucketReference.FromBucketUrl( object_delete_task.bucket_url) retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(bucket_ref, object_delete_task.remote_path))
def CopyFilesToCodeBucket(modules, bucket, source_contexts): """Examines modules and copies files to a Google Cloud Storage bucket. Args: modules: [(str, ModuleYamlInfo)] List of pairs of module name, and parsed module information. bucket: str A URL to the Google Cloud Storage bucket where the files will be uploaded. source_contexts: [dict] List of json-serializable source contexts associated with the modules. Returns: A lookup from module name to a dictionary representing the manifest. See _BuildStagingDirectory. """ manifests = {} with file_utils.TemporaryDirectory() as staging_directory: for (module, info) in modules: source_directory = os.path.dirname(info.file) excluded_files_regex = info.parsed.skip_files.regex manifest = _BuildStagingDirectory(source_directory, staging_directory, bucket, excluded_files_regex, source_contexts) manifests[module] = manifest if any(manifest for manifest in manifests.itervalues()): log.status.Print('Copying files to Google Cloud Storage...') log.status.Print('Synchronizing files to [{b}].'.format(b=bucket)) try: log.SetUserOutputEnabled(False) def _StatusUpdate(result, unused_retry_state): log.info('Error synchronizing files. Return code: {0}. ' 'Retrying.'.format(result)) retryer = retry.Retryer(max_retrials=3, status_update_func=_StatusUpdate) def _ShouldRetry(return_code, unused_retry_state): return return_code != 0 try: retryer.RetryOnResult(cloud_storage.Rsync, (staging_directory, bucket), should_retry_if=_ShouldRetry) except retry.RetryException as e: raise exceptions.ToolException(( 'Could not synchronize files. The gsutil command exited with ' 'status [{s}]. Command output is available in [{l}].' ).format(s=e.last_result, l=log.GetLogFilePath())) finally: # Reset to the standard log level. log.SetUserOutputEnabled(None) return manifests
def WaitFor(poller, operation_ref, message): """Waits with retrues for operation to be done given poller. Args: poller: OperationPoller, poller to use during retrials. operation_ref: object, passed to operation poller poll method. message: str, string to display for progrss_tracker. Returns: poller.GetResult(operation). Raises: AbortWaitError: if ctrl-c was pressed. TimeoutError: if retryer has finished wihout being done. """ def _CtrlCHandler(unused_signal, unused_frame): raise AbortWaitError('Ctrl-C aborted wait.') try: with execution_utils.CtrlCSection(_CtrlCHandler): try: with progress_tracker.ProgressTracker(message) as tracker: if poller.PRE_START_SLEEP_MS: _SleepMs(poller.PRE_START_SLEEP_MS) def _StatusUpdate(unused_result, unused_status): tracker.Tick() retryer = retry.Retryer( max_retrials=poller.MAX_RETRIALS, max_wait_ms=poller.MAX_WAIT_MS, exponential_sleep_multiplier=poller. EXPONENTIAL_SLEEP_MULTIPLIER, jitter_ms=poller.JITTER_MS, status_update_func=_StatusUpdate) def _IsNotDone(operation, unused_state): return not poller.IsDone(operation) operation = retryer.RetryOnResult( func=poller.Poll, args=(operation_ref, ), should_retry_if=_IsNotDone, sleep_ms=poller.SLEEP_MS) except retry.RetryException: raise TimeoutError( 'Operation {0} has not finished in {1} seconds'.format( operation_ref, int(poller.MAX_WAIT_MS / 1000))) except AbortWaitError: # Write this out now that progress tracker is done. sys.stderr.write( 'Aborting wait for operation {0}.\n'.format(operation_ref)) raise return poller.GetResult(operation)
def WaitForOperation(op_name, client): """Waits for an operation to complete. Args: op_name: The name of the operation on which to wait. client: The client object that contains the GetOperation request object. Raises: ToolException: if the operation does not complete in time. OperationErrorException: if the operation fails. Returns: The Operation object, if successful. Raises an exception on failure. """ WaitForOperation.operation_response = None messages = GetMessagesModule() def _CheckOperation(op_name): # pylint: disable=missing-docstring op_name = ParseOperationName(op_name) request = messages.ServicemanagementOperationsGetRequest( operationsId=op_name, ) try: result = client.operations.Get(request) except apitools_exceptions.HttpError as error: raise exceptions.HttpException(GetError(error)) if result.done: WaitForOperation.operation_response = result return True else: return False # Wait for no more than 30 minutes while retrying the Operation retrieval try: retry.Retryer(exponential_sleep_multiplier=1.1, wait_ceiling_ms=10000, max_wait_ms=30 * 60 * 1000).RetryOnResult( _CheckOperation, [op_name], should_retry_if=False, sleep_ms=1500) except retry.MaxRetrialsException: raise exceptions.ToolException('Timed out while waiting for ' 'operation %s. Note that the operation ' 'is still pending.' % op_name) # Check to see if the operation resulted in an error if WaitForOperation.operation_response.error is not None: raise OperationErrorException( 'The operation with ID {0} resulted in a failure.'.format(op_name)) # If we've gotten this far, the operation completed successfully, # so return the Operation object return WaitForOperation.operation_response
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() request = None if args.tarball_access is not None: tarball_access = arg_utils.ChoiceToEnum( args.tarball_access, dataproc.messages.DiagnoseClusterRequest. TarballAccessValueValuesEnum) diagnose_request = dataproc.messages.DiagnoseClusterRequest( tarballAccess=tarball_access) request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, diagnoseClusterRequest=diagnose_request) else: request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) operation = dataproc.client.projects_regions_clusters.Diagnose(request) # TODO(b/36052522): Stream output during polling. operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster diagnose operation', timeout_s=args.timeout) if not operation.response: raise exceptions.OperationError('Operation is missing response') properties = encoding.MessageToDict(operation.response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.OperationError('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) # A single read might not read whole stream. Try a few times. read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None) try: read_retrier.RetryOnResult( lambda: driver_log_stream.ReadIntoWritable(log.err), sleep_ms=100, should_retry_if=lambda *_: driver_log_stream.open) except retry.MaxRetrialsException: log.warning('Diagnostic finished successfully, ' 'but output did not finish streaming.') log.err.Print('-----------------------------------------------') return output_uri
def _UploadFile(file_upload_task): """Complete one FileUploadTask (safe to run in parallel).""" storage_client = storage_api.StorageClient() bucket_ref = storage_util.BucketReference.FromBucketUrl( file_upload_task.bucket_url) local_path = file_upload_task.local_path retry.Retryer(max_retrials=3).RetryOnException( storage_client.CopyFileToGCS, args=(bucket_ref, local_path, file_upload_task.remote_path))