Exemple #1
0
    def Run(self, args):
        """This is what gets called when the user runs this command."""
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        job_id = util.GetJobId(args.id)
        job_ref = util.ParseJob(job_id, self.context)

        self.PopulateFilesByType(args)

        cluster_ref = util.ParseCluster(args.cluster, self.context)
        request = cluster_ref.Request()

        try:
            cluster = client.projects_regions_clusters.Get(request)
        except apitools_exceptions.HttpError as error:
            raise exceptions.HttpException(util.FormatHttpError(error))

        self._staging_dir = self.GetStagingDir(cluster)
        self.ValidateAndStageFiles()

        job = messages.Job(
            reference=messages.JobReference(projectId=job_ref.projectId,
                                            jobId=job_ref.jobId),
            placement=messages.JobPlacement(clusterName=args.cluster))

        self.ConfigureJob(job, args)

        request = messages.DataprocProjectsRegionsJobsSubmitRequest(
            projectId=job_ref.projectId,
            region=job_ref.region,
            submitJobRequest=messages.SubmitJobRequest(job=job))

        try:
            job = client.projects_regions_jobs.Submit(request)
        except apitools_exceptions.HttpError as error:
            raise exceptions.HttpException(util.FormatHttpError(error))

        log.status.Print('Job [{0}] submitted.'.format(job_id))

        if not args. async:
            job = util.WaitForJobTermination(
                job,
                self.context,
                message='Waiting for job completion',
                goal_state=messages.JobStatus.StateValueValuesEnum.DONE,
                stream_driver_log=True)
            log.status.Print('Job [{0}] finished successfully.'.format(job_id))

        return job
  def Run(self, args):
    client = self.context['dataproc_client']
    messages = self.context['dataproc_messages']

    cluster_ref = util.ParseCluster(args.name, self.context)

    request = messages.DataprocProjectsRegionsClustersDiagnoseRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId)

    try:
      operation = client.projects_regions_clusters.Diagnose(request)
      # TODO(user): Stream output during polling.
      operation = util.WaitForOperation(
          operation, self.context,
          message='Waiting for cluster diagnose operation')
      response = operation.response
    except apitools_exceptions.HttpError as error:
      raise exceptions.HttpException(util.FormatHttpError(error))

    if not response:
      raise exceptions.ToolException('Operation is missing response')

    properties = encoding.MessageToDict(response)
    output_uri = properties['outputUri']

    if not output_uri:
      raise exceptions.ToolException('Response is missing outputUri')

    log.err.Print('Output from diagnostic:')
    log.err.Print('-----------------------------------------------')
    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
        output_uri)
    # A single read might not read whole stream. Try a few times.
    read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
    try:
      read_retrier.RetryOnResult(
          lambda: driver_log_stream.ReadIntoWritable(log.err),
          sleep_ms=100,
          should_retry_if=lambda *_: driver_log_stream.open)
    except retry.MaxRetrialsException:
      log.warn(
          'Diagnostic finished succesfully, '
          'but output did not finish streaming.')
    log.err.Print('-----------------------------------------------')
    return output_uri
Exemple #3
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        request = messages.DataprocProjectsClustersDiagnoseRequest(
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        try:
            operation = client.projects_clusters.Diagnose(request)
            operation = util.WaitForOperation(
                operation,
                self.context,
                message='Waiting for cluster diagnose operation')
            response = operation.response
        except apitools_base.HttpError as error:
            raise exceptions.HttpException(util.FormatHttpError(error))

        if not response:
            raise exceptions.ToolException('Operation is missing response')

        properties = apitools_base.MessageToDict(response)
        output_uri = properties['outputUri']

        if not output_uri:
            raise exceptions.ToolException('Response is missing outputUri')

        log.err.Print('Output from diagnostic:')
        log.err.Print('-----------------------------------------------')
        driver_log_stream = storage_helpers.StorageObjectSeriesStream(
            output_uri)
        driver_log_stream.ReadIntoWritable(log.err)
        log.err.Print('-----------------------------------------------')
        return output_uri