예제 #1
0
 def _Create(self):
   """Creates the disk."""
   cmd = util.GcloudCommand(self, 'compute', 'disks', 'create', self.name)
   cmd.flags['size'] = self.disk_size
   cmd.flags['type'] = self.disk_type
   cmd.flags['labels'] = util.MakeFormattedDefaultLabels()
   if self.image:
     cmd.flags['image'] = self.image
   if self.image_project:
     cmd.flags['image-project'] = self.image_project
   _, stderr, retcode = cmd.Issue()
   util.CheckGcloudResponseKnownFailures(stderr, retcode)
  def _Create(self):
    """Creates the cluster."""
    if self.min_cpu_platform or self.gpu_count:
      cmd = util.GcloudCommand(
          self, 'beta', 'container', 'clusters', 'create', self.name)
    else:
      cmd = util.GcloudCommand(
          self, 'container', 'clusters', 'create', self.name)

    cmd.flags['cluster-version'] = self.cluster_version
    if FLAGS.gke_enable_alpha:
      cmd.args.append('--enable-kubernetes-alpha')
      cmd.args.append('--no-enable-autorepair')
      cmd.args.append('--no-enable-autoupgrade')

    user = util.GetDefaultUser()
    if 'gserviceaccount.com' in user:
      cmd.flags['service-account'] = user
      self.use_application_default_credentials = False
    else:
      cmd.flags['scopes'] = 'cloud-platform'

    if self.gpu_count:
      cmd.flags['accelerator'] = (gce_virtual_machine.
                                  GenerateAcceleratorSpecString(self.gpu_type,
                                                                self.gpu_count))
    if self.min_cpu_platform:
      cmd.flags['min-cpu-platform'] = self.min_cpu_platform

    if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
      cmd.args.append('--enable-autoscaling')
      cmd.flags['max-nodes'] = self.max_nodes
      cmd.flags['min-nodes'] = self.min_nodes

    cmd.flags['num-nodes'] = self.num_nodes

    if self.machine_type is None:
      cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
          self.cpus, self.memory)
    else:
      cmd.flags['machine-type'] = self.machine_type

    cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
    cmd.flags['labels'] = util.MakeFormattedDefaultLabels()

    # This command needs a long timeout due to the many minutes it
    # can take to provision a large GPU-accelerated GKE cluster.
    cmd.Issue(timeout=900, env=self._GetRequiredGkeEnv())
    def AddMetadata(self, **kwargs):
        """Adds metadata to the VM and disk."""
        if not kwargs:
            return
        cmd = util.GcloudCommand(self, 'compute', 'instances', 'add-metadata',
                                 self.name)
        cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
        if kwargs:
            cmd.flags['metadata'] = '{metadata},{kwargs}'.format(
                metadata=cmd.flags['metadata'], kwargs=util.FormatTags(kwargs))
        cmd.Issue()

        cmd = util.GcloudCommand(
            self, 'compute', 'disks', 'add-labels',
            '--labels={}'.format(util.MakeFormattedDefaultLabels()), self.name)
        cmd.Issue()
    def SubmitJob(self,
                  jarfile,
                  classname,
                  job_poll_interval=None,
                  job_arguments=None,
                  job_stdout_file=None,
                  job_type=None):
        """See base class."""
        cmd = util.GcloudCommand(self, 'dataproc', 'jobs', 'submit', job_type)
        cmd.flags['cluster'] = self.cluster_id
        cmd.flags['labels'] = util.MakeFormattedDefaultLabels()

        if classname:
            cmd.flags['jars'] = jarfile
            cmd.flags['class'] = classname
        else:
            cmd.flags['jar'] = jarfile

        self.append_region(cmd)

        # Dataproc gives as stdout an object describing job execution.
        # Its stderr contains a mix of the stderr of the job, and the
        # stdout of the job.  We set the driver log level to FATAL
        # to suppress those messages, and we can then separate, hopefully
        # the job standard out from the log messages.
        cmd.flags['driver-log-levels'] = 'root={}'.format(FLAGS.dpb_log_level)

        if job_arguments:
            cmd.additional_flags = ['--'] + job_arguments

        stdout, stderr, retcode = cmd.Issue(timeout=None)
        if retcode != 0:
            return {dpb_service.SUCCESS: False}

        stats = self._GetStats(stdout)
        return stats
예제 #5
0
    def SubmitJob(self,
                  jarfile,
                  classname,
                  job_script=None,
                  job_poll_interval=None,
                  job_arguments=None,
                  job_stdout_file=None,
                  job_type=spark_service.SPARK_JOB_TYPE):
        cmd = util.GcloudCommand(self, 'dataproc', 'jobs', 'submit', job_type)
        cmd.flags['cluster'] = self.cluster_id
        cmd.flags['labels'] = util.MakeFormattedDefaultLabels()
        # If we don't put this here, zone is auotmatically added to the command
        # which breaks dataproc jobs submit
        cmd.flags['zone'] = []

        cmd.additional_flags = []
        if classname and jarfile:
            cmd.flags['jars'] = jarfile
            cmd.flags['class'] = classname
        elif jarfile:
            cmd.flags['jar'] = jarfile
        elif job_script:
            cmd.additional_flags += [job_script]

        # Dataproc gives as stdout an object describing job execution.
        # Its stderr contains a mix of the stderr of the job, and the
        # stdout of the job.  We can set the driver log level to FATAL
        # to suppress those messages, and we can then separate, hopefully
        # the job standard out from the log messages.
        cmd.flags['driver-log-levels'] = 'root={}'.format(
            FLAGS.spark_service_log_level)
        if job_arguments:
            cmd.additional_flags += ['--'] + job_arguments
        stdout, stderr, retcode = cmd.Issue(timeout=None)
        if retcode != 0:
            return {spark_service.SUCCESS: False}

        stats = self._GetStats(stdout)
        stats[spark_service.SUCCESS] = True

        if job_stdout_file:
            with open(job_stdout_file, 'w') as f:
                lines = stderr.splitlines(True)
                if (not re.match(r'Job \[.*\] submitted.', lines[0]) or
                        not re.match(r'Waiting for job output...', lines[1])):
                    raise Exception('Dataproc output in unexpected format.')
                i = 2
                if job_type == spark_service.SPARK_JOB_TYPE:
                    if not re.match(r'\r', lines[i]):
                        raise Exception(
                            'Dataproc output in unexpected format.')
                    i += 1
                    # Eat these status lines.  They end in \r, so they overwrite
                    # themselves at the console or when you cat a file.  But they
                    # are part of this string.
                    while re.match(r'\[Stage \d+:', lines[i]):
                        i += 1
                    if not re.match(r' *\r$', lines[i]):
                        raise Exception(
                            'Dataproc output in unexpected format.')

                while i < len(lines) and not re.match(r'Job \[.*\]', lines[i]):
                    f.write(lines[i])
                    i += 1
                if i != len(lines) - 1:
                    raise Exception('Dataproc output in unexpected format.')
        return stats