Example #1
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddAutoscalingPolicyResourceArg(parser, 'export',
                                           dataproc.api_version)
     export_util.AddExportFlags(parser)
Example #2
0
  def Run(self, args):
    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = util.ParseCluster(args.name, dataproc)

    cluster_config = dataproc.messages.ClusterConfig()
    changed_fields = []

    has_changes = False

    if args.num_workers is not None:
      worker_config = dataproc.messages.InstanceGroupConfig(
          numInstances=args.num_workers)
      cluster_config.workerConfig = worker_config
      changed_fields.append('config.worker_config.num_instances')
      has_changes = True

    if args.num_preemptible_workers is not None:
      worker_config = dataproc.messages.InstanceGroupConfig(
          numInstances=args.num_preemptible_workers)
      cluster_config.secondaryWorkerConfig = worker_config
      changed_fields.append(
          'config.secondary_worker_config.num_instances')
      has_changes = True

    # Update labels if the user requested it
    labels = None
    if args.update_labels or args.remove_labels:
      has_changes = True
      changed_fields.append('labels')

      # We need to fetch cluster first so we know what the labels look like. The
      # labels_util.UpdateLabels will fill out the proto for us with all the
      # updates and removals, but first we need to provide the current state
      # of the labels
      get_cluster_request = (
          dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
              projectId=cluster_ref.projectId,
              region=cluster_ref.region,
              clusterName=cluster_ref.clusterName))
      current_cluster = dataproc.client.projects_regions_clusters.Get(
          get_cluster_request)
      labels = labels_util.UpdateLabels(
          current_cluster.labels,
          dataproc.messages.Cluster.LabelsValue,
          args.update_labels,
          args.remove_labels)

    if not has_changes:
      raise exceptions.ArgumentError(
          'Must specify at least one cluster parameter to update.')

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        labels=labels,
        projectId=cluster_ref.projectId)

    request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId,
        cluster=cluster,
        updateMask=','.join(changed_fields))

    if (self.ReleaseTrack() == base.ReleaseTrack.BETA and
        args.graceful_decommission_timeout):
      request.gracefulDecommissionTimeout = (
          str(args.graceful_decommission_timeout) + 's')

    operation = dataproc.client.projects_regions_clusters.Patch(request)

    if args.async:
      log.status.write(
          'Updating [{0}] with operation [{1}].'.format(
              cluster_ref, operation.name))
      return

    util.WaitForOperation(
        dataproc,
        operation,
        message='Waiting for cluster update operation',
        timeout_s=args.timeout)

    request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)
    cluster = dataproc.client.projects_regions_clusters.Get(request)
    log.UpdatedResource(cluster_ref)
    return cluster
Example #3
0
 def Args(cls, parser):
   dataproc = dp.Dataproc(cls.ReleaseTrack())
   flags.AddJobResourceArg(parser, 'kill', dataproc.api_version)
Example #4
0
 def Args(cls, parser):
   dataproc = dp.Dataproc(cls.ReleaseTrack())
   flags.AddOperationResourceArg(parser, 'cancel', dataproc.api_version)
Example #5
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        cluster_config = dataproc.messages.ClusterConfig()
        changed_fields = []

        has_changes = False

        if args.num_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_workers)
            cluster_config.workerConfig = worker_config
            changed_fields.append('config.worker_config.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfig = worker_config
            changed_fields.append(
                'config.secondary_worker_config.num_instances')
            has_changes = True

        if self.ReleaseTrack() == base.ReleaseTrack.BETA:
            lifecycle_config = dataproc.messages.LifecycleConfig()
            changed_config = False
            if args.max_age is not None:
                lifecycle_config.autoDeleteTtl = str(args.max_age) + 's'
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_ttl')
                changed_config = True
            if args.expiration_time is not None:
                lifecycle_config.autoDeleteTime = times.FormatDateTime(
                    args.expiration_time)
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_time')
                changed_config = True
            if args.max_idle is not None:
                lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's'
                changed_fields.append(
                    'config.lifecycle_config.idle_delete_ttl')
                changed_config = True
            if args.no_max_age:
                lifecycle_config.autoDeleteTtl = None
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_ttl')
                changed_config = True
            if args.no_max_idle:
                lifecycle_config.idleDeleteTtl = None
                changed_fields.append(
                    'config.lifecycle_config.idle_delete_ttl')
                changed_config = True
            if changed_config:
                cluster_config.lifecycleConfig = lifecycle_config
                has_changes = True

        # Put in a thunk so we only make this call if needed
        def _GetCurrentLabels():
            # We need to fetch cluster first so we know what the labels look like. The
            # labels_util will fill out the proto for us with all the updates and
            # removals, but first we need to provide the current state of the labels
            get_cluster_request = (
                dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
                    projectId=cluster_ref.projectId,
                    region=cluster_ref.region,
                    clusterName=cluster_ref.clusterName))
            current_cluster = dataproc.client.projects_regions_clusters.Get(
                get_cluster_request)
            return current_cluster.labels

        labels_update = labels_util.ProcessUpdateArgsLazy(
            args,
            dataproc.messages.Cluster.LabelsValue,
            orig_labels_thunk=_GetCurrentLabels)
        if labels_update.needs_update:
            has_changes = True
            changed_fields.append('labels')
        labels = labels_update.GetOrNone()

        if not has_changes:
            raise exceptions.ArgumentError(
                'Must specify at least one cluster parameter to update.')

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            labels=labels,
            projectId=cluster_ref.projectId)

        request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields),
            requestId=util.GetUniqueId())

        if args.graceful_decommission_timeout is not None:
            request.gracefulDecommissionTimeout = (
                str(args.graceful_decommission_timeout) + 's')

        operation = dataproc.client.projects_regions_clusters.Patch(request)

        if args. async:
            log.status.write('Updating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        util.WaitForOperation(dataproc,
                              operation,
                              message='Waiting for cluster update operation',
                              timeout_s=args.timeout)

        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(request)
        log.UpdatedResource(cluster_ref)
        return cluster
 def Args(parser):
   dataproc = dp.Dataproc(base.ReleaseTrack.GA)
   flags.AddBatchResourceArg(parser, 'wait', dataproc.api_version)
Example #7
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     labels_util.AddCreateLabelsFlags(parser)
     flags.AddTemplateResourceArg(parser, 'create', dataproc.api_version)
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddAutoscalingPolicyResourceArg(parser,
                                           'retrieve the IAM policy for',
                                           api_version=dataproc.api_version)
Example #9
0
 def Args(parser):
     dataproc = dp.Dataproc()
     flags.AddBatchResourceArg(parser, 'describe', dataproc.api_version)
Example #10
0
 def GetApiVersion(cls):
     """Returns the API version based on the release track."""
     return dp.Dataproc(cls.ReleaseTrack()).api_version
Example #11
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddImportArgs(parser, 'import', dataproc.api_version,
                         'AutoscalingPolicy')
    def Run(self, args):
        message = (
            'A personal authentication session will propagate your personal '
            'credentials to the cluster, so make sure you trust the cluster '
            'and the user who created it.')
        console_io.PromptContinue(
            message=message,
            cancel_on_no=True,
            cancel_string='Enabling session aborted by user')
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = args.CONCEPTS.cluster.Parse()
        project = cluster_ref.projectId
        region = cluster_ref.region
        cluster_name = cluster_ref.clusterName
        get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=project, region=region, clusterName=cluster_name)
        cluster = dataproc.client.projects_regions_clusters.Get(get_request)
        cluster_uuid = cluster.clusterUuid

        if args.access_boundary:
            with files.FileReader(args.access_boundary) as abf:
                access_boundary_json = abf.read()
        else:
            access_boundary_json = flags.ProjectGcsObjectsAccessBoundary(
                project)

        # ECIES keys should be used by default. If tink libraries are absent from
        # the system then fallback to using RSA keys.
        cluster_key_type = 'ECIES' if self.is_tink_library_installed(
        ) else 'RSA'

        cluster_key = None
        if cluster_key_type == 'ECIES':
            # Try to fetch ECIES keys from cluster control plane node's metadata.
            # If ECIES keys are not available then again fallback to RSA keys.
            cluster_key = clusters.ClusterKey(cluster, cluster_key_type)
            if not cluster_key:
                cluster_key_type = 'RSA'

        openssl_executable = None
        if cluster_key_type == 'RSA':
            cluster_key = clusters.ClusterKey(cluster, cluster_key_type)
            openssl_executable = args.openssl_command
            if not openssl_executable:
                try:
                    openssl_executable = files.FindExecutableOnPath('openssl')
                except ValueError:
                    log.fatal(
                        'Could not find openssl on your system. The enable-session '
                        'command requires openssl to be installed.')

        operation_poller = waiter.CloudOperationPollerNoResources(
            dataproc.client.projects_regions_operations,
            lambda operation: operation.name)
        try:
            if not cluster_key:
                raise exceptions.PersonalAuthError(
                    'The cluster {} does not support personal auth.'.format(
                        cluster_name))

            with progress_tracker.ProgressTracker(
                    'Injecting initial credentials into the cluster {}'.format(
                        cluster_name),
                    autotick=True):
                self.inject_credentials(dataproc, project, region,
                                        cluster_name, cluster_uuid,
                                        cluster_key, access_boundary_json,
                                        operation_poller, openssl_executable)

            if not args.refresh_credentials:
                return

            update_message = (
                'Periodically refreshing credentials for cluster {}. This'
                ' will continue running until the command is interrupted'
            ).format(cluster_name)

            with progress_tracker.ProgressTracker(update_message,
                                                  autotick=True):
                try:
                    # Cluster keys are periodically regenerated, so fetch the latest
                    # each time we inject credentials.
                    cluster = dataproc.client.projects_regions_clusters.Get(
                        get_request)
                    cluster_key = clusters.ClusterKey(cluster,
                                                      cluster_key_type)
                    if not cluster_key:
                        raise exceptions.PersonalAuthError(
                            'The cluster {} does not support personal auth.'.
                            format(cluster_name))

                    failure_count = 0
                    while failure_count < 3:
                        try:
                            time.sleep(30)
                            self.inject_credentials(dataproc, project, region,
                                                    cluster_name, cluster_uuid,
                                                    cluster_key,
                                                    access_boundary_json,
                                                    operation_poller,
                                                    openssl_executable)
                            failure_count = 0
                        except ValueError as err:
                            log.error(err)
                            failure_count += 1
                    raise exceptions.PersonalAuthError(
                        'Credential injection failed three times in a row, giving up...'
                    )
                except (console_io.OperationCancelledError, KeyboardInterrupt):
                    return
        except exceptions.PersonalAuthError as err:
            log.error(err)
            return
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddTimeoutFlag(parser, default='35m')
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddParametersFlag(parser)
     flags.AddTemplateResourceArg(parser, 'run', dataproc.api_version)
Example #14
0
 def SetUp(self):
     self.dataproc_mock = dp.Dataproc(self.track)
     self.dataproc_mock._client = self.mock_client
     self.dataproc_mock._messages = self.messages
Example #15
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = args.CONCEPTS.cluster.Parse()
        project = cluster_ref.projectId
        region = cluster_ref.region
        cluster_name = cluster_ref.clusterName
        get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=project, region=region, clusterName=cluster_name)
        cluster = dataproc.client.projects_regions_clusters.Get(get_request)
        cluster_uuid = cluster.clusterUuid

        if args.access_boundary:
            with files.FileReader(args.access_boundary, mode='r') as abf:
                access_boundary_json = abf.read()
        else:
            access_boundary_json = flags.ProjectGcsObjectsAccessBoundary(
                project)

        openssl_executable = args.openssl_command
        if not openssl_executable:
            try:
                openssl_executable = files.FindExecutableOnPath('openssl')
            except ValueError:
                log.fatal(
                    'Could not find openssl on your system. The enable-session '
                    'command requires openssl to be installed.')

        operation_poller = waiter.CloudOperationPollerNoResources(
            dataproc.client.projects_regions_operations,
            lambda operation: operation.name)
        try:
            cluster_key = clusters.ClusterKey(cluster)
            if not cluster_key:
                raise exceptions.PersonalAuthError(
                    'The cluster {} does not support personal auth.'.format(
                        cluster_name))

            with progress_tracker.ProgressTracker(
                    'Injecting initial credentials into the cluster {}'.format(
                        cluster_name),
                    autotick=True):
                self.inject_credentials(dataproc, project, region,
                                        cluster_name, cluster_uuid,
                                        cluster_key, access_boundary_json,
                                        openssl_executable, operation_poller)

            if not args.refresh_credentials:
                return

            update_message = (
                'Periodically refreshing credentials for cluster {}. This'
                ' will continue running until the command is interrupted'
            ).format(cluster_name)

            with progress_tracker.ProgressTracker(update_message,
                                                  autotick=True):
                try:
                    # Cluster keys are periodically regenerated, so fetch the latest
                    # each time we inject credentials.
                    cluster = dataproc.client.projects_regions_clusters.Get(
                        get_request)
                    cluster_key = clusters.ClusterKey(cluster)
                    if not cluster_key:
                        raise exceptions.PersonalAuthError(
                            'The cluster {} does not support personal auth.'.
                            format(cluster_name))

                    failure_count = 0
                    while failure_count < 3:
                        try:
                            time.sleep(30)
                            self.inject_credentials(dataproc, project, region,
                                                    cluster_name, cluster_uuid,
                                                    cluster_key,
                                                    access_boundary_json,
                                                    openssl_executable,
                                                    operation_poller)
                            failure_count = 0
                        except ValueError as err:
                            log.error(err)
                            failure_count += 1
                    raise exceptions.PersonalAuthError(
                        'Credential injection failed three times in a row, giving up...'
                    )
                except (console_io.OperationCancelledError, KeyboardInterrupt):
                    return
        except exceptions.PersonalAuthError as err:
            log.error(err)
            return
Example #16
0
  def Run(self, args):
    dataproc = dp.Dataproc(base.ReleaseTrack.GA)
    sparkr_batch = sparkr_batch_factory.SparkRBatchFactory(
        dataproc).UploadLocalFilesAndGetMessage(args)

    return batch_submitter.Submit(sparkr_batch, dataproc, args)
Example #17
0
 def Args(cls, parser):
     pig.PigBase.Args(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     workflow_templates.AddWorkflowTemplatesArgs(parser,
                                                 dataproc.api_version)
Example #18
0
  def Run(self, args):
    dataproc = dp.Dataproc(self.ReleaseTrack())

    # Read cluster from YAML file.
    cluster = util.ReadYaml(args.file, dataproc.messages.Cluster)
    return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
Example #19
0
    def Run(self, args):
        self.ValidateArgs(args)

        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        beta = self.ReleaseTrack() == base.ReleaseTrack.BETA
        cluster_config = clusters.GetClusterConfig(args, dataproc,
                                                   cluster_ref.projectId,
                                                   compute_resources, beta)

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        self.ConfigureCluster(dataproc.messages, args, cluster)

        operation = dataproc.client.projects_regions_clusters.Create(
            dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster creation operation',
            timeout_s=args.timeout)

        get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(get_request)
        if cluster.status.state == (
                dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

            zone_uri = cluster.config.gceClusterConfig.zoneUri
            zone_short_name = zone_uri.split('/')[-1]

            # Log the URL of the cluster
            log.CreatedResource(
                cluster_ref,
                # Also indicate which zone the cluster was placed in. This is helpful
                # if the server picked a zone (auto zone)
                details='Cluster placed in zone [{0}]'.format(zone_short_name))
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Example #20
0
  def Run(self, args):
    self.ValidateArgs(args)

    dataproc = dp.Dataproc()

    cluster_ref = dataproc.ParseCluster(args.name)

    compute_resources = compute_helpers.GetComputeResources(
        self.ReleaseTrack(), args.name)

    master_accelerator_type = None
    worker_accelerator_type = None
    master_accelerator_count = None
    worker_accelerator_count = None
    if self.ReleaseTrack() == base.ReleaseTrack.BETA:
      if args.master_accelerator:
        master_accelerator_type = args.master_accelerator['type']
        master_accelerator_count = args.master_accelerator.get('count', 1)
      if args.worker_accelerator:
        worker_accelerator_type = args.worker_accelerator['type']
        worker_accelerator_count = args.worker_accelerator.get('count', 1)

    # Resolve non-zonal GCE resources
    # We will let the server resolve short names of zonal resources because
    # if auto zone is requested, we will not know the zone before sending the
    # request
    image_ref = args.image and compute_resources.Parse(
        args.image,
        params={'project': cluster_ref.projectId},
        collection='compute.images')
    network_ref = args.network and compute_resources.Parse(
        args.network,
        params={'project': cluster_ref.projectId},
        collection='compute.networks')
    subnetwork_ref = args.subnet and compute_resources.Parse(
        args.subnet,
        params={
            'project': cluster_ref.projectId,
            'region': properties.VALUES.compute.region.GetOrFail,
        },
        collection='compute.subnetworks')
    timeout_str = str(args.initialization_action_timeout) + 's'
    init_actions = [
        dataproc.messages.NodeInitializationAction(
            executableFile=exe, executionTimeout=timeout_str)
        for exe in (args.initialization_actions or [])]
    # Increase the client timeout for each initialization action.
    args.timeout += args.initialization_action_timeout * len(init_actions)

    expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

    software_config = dataproc.messages.SoftwareConfig(
        imageVersion=args.image_version)

    master_boot_disk_size_gb = args.master_boot_disk_size_gb
    if args.master_boot_disk_size:
      master_boot_disk_size_gb = (
          api_utils.BytesToGb(args.master_boot_disk_size))

    worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
    if args.worker_boot_disk_size:
      worker_boot_disk_size_gb = (
          api_utils.BytesToGb(args.worker_boot_disk_size))

    preemptible_worker_boot_disk_size_gb = (
        api_utils.BytesToGb(args.preemptible_worker_boot_disk_size))

    if args.single_node:
      args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true'

    if args.properties:
      software_config.properties = encoding.DictToMessage(
          args.properties, dataproc.messages.SoftwareConfig.PropertiesValue)

    gce_cluster_config = dataproc.messages.GceClusterConfig(
        networkUri=network_ref and network_ref.SelfLink(),
        subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(),
        internalIpOnly=args.no_address,
        serviceAccount=args.service_account,
        serviceAccountScopes=expanded_scopes,
        zoneUri=properties.VALUES.compute.zone.GetOrFail())

    if args.tags:
      gce_cluster_config.tags = args.tags

    if args.metadata:
      flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items())
      gce_cluster_config.metadata = encoding.DictToMessage(
          flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue)

    master_accelerators = []
    if master_accelerator_type:
      master_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=master_accelerator_type,
              acceleratorCount=master_accelerator_count))
    worker_accelerators = []
    if worker_accelerator_type:
      worker_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=worker_accelerator_type,
              acceleratorCount=worker_accelerator_count))

    cluster_config = dataproc.messages.ClusterConfig(
        configBucket=args.bucket,
        gceClusterConfig=gce_cluster_config,
        masterConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_masters,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.master_machine_type,
            accelerators=master_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=master_boot_disk_size_gb,
                numLocalSsds=args.num_master_local_ssds,),),
        workerConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_workers,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.worker_machine_type,
            accelerators=worker_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=worker_boot_disk_size_gb,
                numLocalSsds=args.num_worker_local_ssds,),),
        initializationActions=init_actions,
        softwareConfig=software_config,)

    # Secondary worker group is optional. However, users may specify
    # future pVM disk size at creation time.
    if (args.num_preemptible_workers is not None or
        preemptible_worker_boot_disk_size_gb is not None):
      cluster_config.secondaryWorkerConfig = (
          dataproc.messages.InstanceGroupConfig(
              numInstances=args.num_preemptible_workers,
              diskConfig=dataproc.messages.DiskConfig(
                  bootDiskSizeGb=preemptible_worker_boot_disk_size_gb,
              )))

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        projectId=cluster_ref.projectId)

    self.ConfigureCluster(dataproc.messages, args, cluster)

    operation = dataproc.client.projects_regions_clusters.Create(
        dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            cluster=cluster))

    if args.async:
      log.status.write(
          'Creating [{0}] with operation [{1}].'.format(
              cluster_ref, operation.name))
      return

    operation = dataproc.WaitForOperation(
        operation,
        message='Waiting for cluster creation operation',
        timeout_s=args.timeout)

    get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)
    cluster = dataproc.client.projects_regions_clusters.Get(get_request)
    if cluster.status.state == (
        dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

      zone_uri = cluster.config.gceClusterConfig.zoneUri
      zone_short_name = zone_uri.split('/')[-1]

      # Log the URL of the cluster
      log.CreatedResource(
          cluster_ref,
          # Also indicate which zone the cluster was placed in. This is helpful
          # if the server picked a zone (auto zone)
          details='Cluster placed in zone [{0}]'.format(zone_short_name))
    else:
      log.error('Create cluster failed!')
      if operation.details:
        log.error('Details:\n' + operation.details)
    return cluster
Example #21
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'export', dataproc.api_version)
     export_util.AddExportFlags(parser, cls.GetSchemaPath(for_help=True))
Example #22
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddJobResourceArg(parser, 'retrieve the policy for',
                             dataproc.api_version)
     base.URI_FLAG.RemoveFromParser(parser)
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddTemplateResourceArg(parser, 'set the policy on',
                                  dataproc.api_version)
     iam_util.AddArgForPolicyFile(parser)
Example #24
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddTemplateResourceArg(parser, 'describe', dataproc.api_version)
     flags.AddVersionFlag(parser)
Example #25
0
 def Run(self, args):
     return _Run(dp.Dataproc(self.ReleaseTrack()), args)
Example #26
0
 def Args(cls, parser):
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
Example #27
0
 def Args(cls, parser):
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'stop', dataproc.api_version)
Example #28
0
    def Args(cls, parser):
        dataproc = dp.Dataproc(cls.ReleaseTrack())
        base.ASYNC_FLAG.AddToParser(parser)
        # Allow the user to specify new labels as well as update/remove existing
        labels_util.AddUpdateLabelsFlags(parser)
        # Updates can take hours if a lot of data needs to be moved on HDFS
        flags.AddTimeoutFlag(parser, default='3h')
        flags.AddClusterResourceArg(parser, 'update', dataproc.api_version)
        parser.add_argument(
            '--num-workers',
            type=int,
            help='The new number of worker nodes in the cluster.')
        parser.add_argument(
            '--num-preemptible-workers',
            type=int,
            help='The new number of preemptible worker nodes in the cluster.')

        parser.add_argument('--graceful-decommission-timeout',
                            type=arg_parsers.Duration(lower_bound='0s',
                                                      upper_bound='1d'),
                            help="""
              The graceful decommission timeout for decommissioning Node Managers
              in the cluster, used when removing nodes. Graceful decommissioning
              allows removing nodes from the cluster without interrupting jobs in
              progress. Timeout specifies how long to wait for jobs in progress to
              finish before forcefully removing nodes (and potentially
              interrupting jobs). Timeout defaults to 0 if not set (for forceful
              decommission), and the maximum allowed timeout is 1 day.
              See $ gcloud topic datetimes for information on duration formats.
              """)

        idle_delete_group = parser.add_mutually_exclusive_group()
        idle_delete_group.add_argument('--max-idle',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The duration before cluster is auto-deleted after last job finished,
        such as "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)
        idle_delete_group.add_argument('--no-max-idle',
                                       action='store_true',
                                       help="""\
        Cancels the cluster auto-deletion by cluster idle duration (configured
         by --max-idle flag)
        """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The lifespan of the cluster before it is auto-deleted, such as
        "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)
        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
        information on time formats.
        """)
        auto_delete_group.add_argument('--no-max-age',
                                       action='store_true',
                                       help="""\
        Cancels the cluster auto-deletion by maximum cluster age (configured by
         --max-age or --expiration-time flags)
        """)

        # Can only specify one of --autoscaling-policy or --disable-autoscaling
        autoscaling_group = parser.add_mutually_exclusive_group()
        flags.AddAutoscalingPolicyResourceArgForCluster(autoscaling_group,
                                                        api_version='v1')
        autoscaling_group.add_argument('--disable-autoscaling',
                                       action='store_true',
                                       help="""\
        Disable autoscaling, if it is enabled. This is an alias for passing the
        empty string to --autoscaling-policy',
        """)
 def Args(cls, parser):
   dataproc = dp.Dataproc(cls.ReleaseTrack())
   flags.AddTemplateResourceArg(parser, 'export', dataproc.api_version)
   export_util.AddExportFlags(parser)
   flags.AddVersionFlag(parser)
Example #30
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     labels_util.AddCreateLabelsFlags(parser)
     workflow_templates.AddDagTimeoutFlag(parser, False)
     flags.AddTemplateResourceArg(parser, 'create', dataproc.api_version)