def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = args.CONCEPTS.cluster.Parse()

        request = None
        if args.tarball_access is not None:
            tarball_access = arg_utils.ChoiceToEnum(
                args.tarball_access, dataproc.messages.DiagnoseClusterRequest.
                TarballAccessValueValuesEnum)
            diagnose_request = dataproc.messages.DiagnoseClusterRequest(
                tarballAccess=tarball_access)
            request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
                clusterName=cluster_ref.clusterName,
                region=cluster_ref.region,
                projectId=cluster_ref.projectId,
                diagnoseClusterRequest=diagnose_request)
        else:
            request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
                clusterName=cluster_ref.clusterName,
                region=cluster_ref.region,
                projectId=cluster_ref.projectId)

        operation = dataproc.client.projects_regions_clusters.Diagnose(request)
        # TODO(b/36052522): Stream output during polling.
        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster diagnose operation',
            timeout_s=args.timeout)

        if not operation.response:
            raise exceptions.OperationError('Operation is missing response')

        properties = encoding.MessageToDict(operation.response)
        output_uri = properties['outputUri']

        if not output_uri:
            raise exceptions.OperationError('Response is missing outputUri')

        log.err.Print('Output from diagnostic:')
        log.err.Print('-----------------------------------------------')
        driver_log_stream = storage_helpers.StorageObjectSeriesStream(
            output_uri)
        # A single read might not read whole stream. Try a few times.
        read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
        try:
            read_retrier.RetryOnResult(
                lambda: driver_log_stream.ReadIntoWritable(log.err),
                sleep_ms=100,
                should_retry_if=lambda *_: driver_log_stream.open)
        except retry.MaxRetrialsException:
            log.warning('Diagnostic finished successfully, '
                        'but output did not finish streaming.')
        log.err.Print('-----------------------------------------------')
        return output_uri
Example #2
0
  def Run(self, args):
    client = self.context['dataproc_client']
    messages = self.context['dataproc_messages']

    cluster_ref = util.ParseCluster(args.name, self.context)

    request = messages.DataprocProjectsRegionsClustersDiagnoseRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId)

    try:
      operation = client.projects_regions_clusters.Diagnose(request)
      # TODO(user): Stream output during polling.
      operation = util.WaitForOperation(
          operation, self.context,
          message='Waiting for cluster diagnose operation')
      response = operation.response
    except apitools_exceptions.HttpError as error:
      raise exceptions.HttpException(util.FormatHttpError(error))

    if not response:
      raise exceptions.ToolException('Operation is missing response')

    properties = encoding.MessageToDict(response)
    output_uri = properties['outputUri']

    if not output_uri:
      raise exceptions.ToolException('Response is missing outputUri')

    log.err.Print('Output from diagnostic:')
    log.err.Print('-----------------------------------------------')
    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
        output_uri)
    # A single read might not read whole stream. Try a few times.
    read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
    try:
      read_retrier.RetryOnResult(
          lambda: driver_log_stream.ReadIntoWritable(log.err),
          sleep_ms=100,
          should_retry_if=lambda *_: driver_log_stream.open)
    except retry.MaxRetrialsException:
      log.warn(
          'Diagnostic finished succesfully, '
          'but output did not finish streaming.')
    log.err.Print('-----------------------------------------------')
    return output_uri
  def SetUp(self):
    self.mock_gcs_client = apitools_mock.Client(
        core_apis.GetClientClass('storage', 'v1'),
        real_client=core_apis.GetClientInstance('storage', 'v1', no_http=True))
    self.mock_gcs_client.Mock()
    self.addCleanup(self.mock_gcs_client.Unmock)
    self.mock_exec = self.StartPatch(
        'googlecloudsdk.core.execution_utils.Exec')
    self.mock_config_bin_path = self.StartPropertyPatch(
        config.Paths, 'sdk_bin_path')
    self.mock_config_bin_path.return_value = 'bin'

    self.storage_client = storage_helpers.StorageClient()
    self.storage_stream = storage_helpers.StorageObjectSeriesStream(
        self.BASE_GCS_PATH, self.storage_client)
    self.storage_messages = core_apis.GetMessagesModule('storage', 'v1')

    self.storage_api_client = storage_api.StorageClient()
    self.copy_file_mock = self.StartObjectPatch(self.storage_api_client,
                                                'CopyFileToGCS')
    self.storage_bucket = 'foo'
Example #4
0
    def _CheckStreamer(self, poll_result):
        """Checks if need to init a new output streamer.

    Checks if need to init a new output streamer.
    Remote may fail; switch to new output uri.
    Invalidate the streamer instance and init a new one if necessary.

    Args:
      poll_result: Poll result returned from Poll.
    """

        # Mimic current job waiting behavior to print equal signs across the screen.
        def _PrintEqualsLineAccrossScreen():
            attr = console_attr.GetConsoleAttr()
            log.err.Print('=' * attr.GetTermSize()[0])

        # pylint: disable=assignment-from-none
        uri = self._GetOutputUri(poll_result)
        # pylint: enable=assignment-from-none

        if not uri:
            # Remote resource not ready, nothing to check.
            return

        # Invalidate current streamer if remote output uri changed.
        if self.saved_stream_uri and self.saved_stream_uri != uri:
            self.driver_log_streamer = None
            self.saved_stream_uri = None
            _PrintEqualsLineAccrossScreen()
            log.warning("Attempt failed. Streaming new attempt's output.")
            _PrintEqualsLineAccrossScreen()

        # Init a new streamer if there is no active streamer.
        if not self.driver_log_streamer:
            self.saved_stream_uri = uri
            self.driver_log_streamer = storage_helpers.StorageObjectSeriesStream(
                uri)
Example #5
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        request = messages.DataprocProjectsClustersDiagnoseRequest(
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        try:
            operation = client.projects_clusters.Diagnose(request)
            operation = util.WaitForOperation(
                operation,
                self.context,
                message='Waiting for cluster diagnose operation')
            response = operation.response
        except apitools_base.HttpError as error:
            raise exceptions.HttpException(util.FormatHttpError(error))

        if not response:
            raise exceptions.ToolException('Operation is missing response')

        properties = apitools_base.MessageToDict(response)
        output_uri = properties['outputUri']

        if not output_uri:
            raise exceptions.ToolException('Response is missing outputUri')

        log.err.Print('Output from diagnostic:')
        log.err.Print('-----------------------------------------------')
        driver_log_stream = storage_helpers.StorageObjectSeriesStream(
            output_uri)
        driver_log_stream.ReadIntoWritable(log.err)
        log.err.Print('-----------------------------------------------')
        return output_uri
Example #6
0
def WaitForJobTermination(job,
                          context,
                          message,
                          goal_state,
                          stream_driver_log=False,
                          log_poll_period_s=1,
                          dataproc_poll_period_s=10,
                          timeout_s=None):
    """Poll dataproc Job until its status is terminal or timeout reached.

  Args:
    job: The job to wait to finish.
    context: dict, dataproc Command context.
    message: str, message to display to user while polling.
    goal_state: JobStatus.StateValueValuesEnum, the state to define success
    stream_driver_log: bool, Whether to show the Job's driver's output.
    log_poll_period_s: number, delay in seconds between checking on the log.
    dataproc_poll_period_s: number, delay in seconds between requests to
        the Dataproc API.
    timeout_s: number, time out for job completion. None means no timeout.

  Returns:
    Operation: the return value of the last successful operations.get
    request.

  Raises:
    OperationError: if the operation times out or finishes with an error.
  """
    client = context['dataproc_client']
    job_ref = ParseJob(job.reference.jobId, context)
    request = client.MESSAGES_MODULE.DataprocProjectsRegionsJobsGetRequest(
        projectId=job_ref.projectId,
        region=job_ref.region,
        jobId=job_ref.jobId)
    driver_log_stream = None
    last_job_poll_time = 0
    job_complete = False
    wait_display = None
    driver_output_uri = None

    def ReadDriverLogIfPresent():
        if driver_log_stream and driver_log_stream.open:
            # TODO(b/36049794): Don't read all output.
            driver_log_stream.ReadIntoWritable(log.err)

    def PrintEqualsLine():
        attr = console_attr.GetConsoleAttr()
        log.err.Print('=' * attr.GetTermSize()[0])

    if stream_driver_log:
        log.status.Print('Waiting for job output...')
        wait_display = NoOpProgressDisplay()
    else:
        wait_display = progress_tracker.ProgressTracker(message, autotick=True)
    start_time = now = time.time()
    with wait_display:
        while not timeout_s or timeout_s > (now - start_time):
            # Poll logs first to see if it closed.
            ReadDriverLogIfPresent()
            log_stream_closed = driver_log_stream and not driver_log_stream.open
            if not job_complete and job.status.state in constants.TERMINAL_JOB_STATES:
                job_complete = True
                # Wait an 10s to get trailing output.
                timeout_s = now - start_time + 10

            if job_complete and (not stream_driver_log or log_stream_closed):
                # Nothing left to wait for
                break

            regular_job_poll = (
                not job_complete
                # Poll less frequently on dataproc API
                and now >= last_job_poll_time + dataproc_poll_period_s)
            # Poll at regular frequency before output has streamed and after it has
            # finished.
            expecting_output_stream = stream_driver_log and not driver_log_stream
            expecting_job_done = not job_complete and log_stream_closed
            if regular_job_poll or expecting_output_stream or expecting_job_done:
                last_job_poll_time = now
                try:
                    job = client.projects_regions_jobs.Get(request)
                except apitools_exceptions.HttpError as error:
                    log.warn('GetJob failed:\n{1}', error)
                    # Keep trying until we timeout in case error is transient.
                if (stream_driver_log and job.driverOutputResourceUri
                        and job.driverOutputResourceUri != driver_output_uri):
                    if driver_output_uri:
                        PrintEqualsLine()
                        log.warn(
                            "Job attempt failed. Streaming new attempt's output."
                        )
                        PrintEqualsLine()
                    driver_output_uri = job.driverOutputResourceUri
                    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
                        job.driverOutputResourceUri)
            time.sleep(log_poll_period_s)
            now = time.time()

    # TODO(b/34836493): Get better test coverage of the next 20 lines.
    state = job.status.state
    if state is not goal_state and job.status.details:
        # Just log details, because the state will be in the error message.
        log.info(job.status.details)

    if state in constants.TERMINAL_JOB_STATES:
        if stream_driver_log:
            if not driver_log_stream:
                log.warn('Expected job output not found.')
            elif driver_log_stream.open:
                log.warn(
                    'Job terminated, but output did not finish streaming.')
        if state is goal_state:
            return job
        raise exceptions.JobError(
            'Job [{0}] entered state [{1}] while waiting for [{2}].'.format(
                job_ref.jobId, state, goal_state))
    raise exceptions.JobTimeoutError(
        'Job [{0}] timed out while in state [{1}].'.format(
            job_ref.jobId, state))
Example #7
0
def WaitForJobTermination(dataproc,
                          job,
                          job_ref,
                          message,
                          goal_state,
                          error_state=None,
                          stream_driver_log=False,
                          log_poll_period_s=1,
                          dataproc_poll_period_s=10,
                          timeout_s=None):
    """Poll dataproc Job until its status is terminal or timeout reached.

  Args:
    dataproc: wrapper for dataproc resources, client and messages
    job: The job to wait to finish.
    job_ref: Parsed dataproc.projects.regions.jobs resource containing a
        projectId, region, and jobId.
    message: str, message to display to user while polling.
    goal_state: JobStatus.StateValueValuesEnum, the state to define success
    error_state: JobStatus.StateValueValuesEnum, the state to define failure
    stream_driver_log: bool, Whether to show the Job's driver's output.
    log_poll_period_s: number, delay in seconds between checking on the log.
    dataproc_poll_period_s: number, delay in seconds between requests to
        the Dataproc API.
    timeout_s: number, time out for job completion. None means no timeout.

  Returns:
    Job: the return value of the last successful jobs.get request.

  Raises:
    JobError: if the job finishes with an error.
  """
    request = dataproc.messages.DataprocProjectsRegionsJobsGetRequest(
        projectId=job_ref.projectId,
        region=job_ref.region,
        jobId=job_ref.jobId)
    driver_log_stream = None
    last_job_poll_time = 0
    job_complete = False
    wait_display = None
    driver_output_uri = None

    def ReadDriverLogIfPresent():
        if driver_log_stream and driver_log_stream.open:
            # TODO(b/36049794): Don't read all output.
            driver_log_stream.ReadIntoWritable(log.err)

    def PrintEqualsLine():
        attr = console_attr.GetConsoleAttr()
        log.err.Print('=' * attr.GetTermSize()[0])

    if stream_driver_log:
        log.status.Print('Waiting for job output...')
        wait_display = NoOpProgressDisplay()
    else:
        wait_display = progress_tracker.ProgressTracker(message, autotick=True)
    start_time = now = time.time()
    with wait_display:
        while not timeout_s or timeout_s > (now - start_time):
            # Poll logs first to see if it closed.
            ReadDriverLogIfPresent()
            log_stream_closed = driver_log_stream and not driver_log_stream.open
            if (not job_complete
                    and job.status.state in dataproc.terminal_job_states):
                job_complete = True
                # Wait an 10s to get trailing output.
                timeout_s = now - start_time + 10

            if job_complete and (not stream_driver_log or log_stream_closed):
                # Nothing left to wait for
                break

            regular_job_poll = (
                not job_complete
                # Poll less frequently on dataproc API
                and now >= last_job_poll_time + dataproc_poll_period_s)
            # Poll at regular frequency before output has streamed and after it has
            # finished.
            expecting_output_stream = stream_driver_log and not driver_log_stream
            expecting_job_done = not job_complete and log_stream_closed
            if regular_job_poll or expecting_output_stream or expecting_job_done:
                last_job_poll_time = now
                try:
                    job = dataproc.client.projects_regions_jobs.Get(request)
                except apitools_exceptions.HttpError as error:
                    log.warning('GetJob failed:\n{}'.format(
                        six.text_type(error)))
                    # Do not retry on 4xx errors.
                    if IsClientHttpException(error):
                        raise
                if (stream_driver_log and job.driverOutputResourceUri
                        and job.driverOutputResourceUri != driver_output_uri):
                    if driver_output_uri:
                        PrintEqualsLine()
                        log.warning(
                            "Job attempt failed. Streaming new attempt's output."
                        )
                        PrintEqualsLine()
                    driver_output_uri = job.driverOutputResourceUri
                    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
                        job.driverOutputResourceUri)
            time.sleep(log_poll_period_s)
            now = time.time()

    # TODO(b/34836493): Get better test coverage of the next 20 lines.
    state = job.status.state

    # goal_state and error_state will always be terminal
    if state in dataproc.terminal_job_states:
        if stream_driver_log:
            if not driver_log_stream:
                log.warning('Expected job output not found.')
            elif driver_log_stream.open:
                log.warning(
                    'Job terminated, but output did not finish streaming.')
        if state is goal_state:
            return job
        if error_state and state is error_state:
            if job.status.details:
                raise exceptions.JobError(
                    'Job [{0}] failed with error:\n{1}'.format(
                        job_ref.jobId, job.status.details))
            raise exceptions.JobError('Job [{0}] failed.'.format(
                job_ref.jobId))
        if job.status.details:
            log.info('Details:\n' + job.status.details)
        raise exceptions.JobError(
            'Job [{0}] entered state [{1}] while waiting for [{2}].'.format(
                job_ref.jobId, state, goal_state))
    raise exceptions.JobTimeoutError(
        'Job [{0}] timed out while in state [{1}].'.format(
            job_ref.jobId, state))