def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddAutoscalingPolicyResourceArg(parser, 'export', dataproc.api_version) export_util.AddExportFlags(parser)
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) cluster_config = dataproc.messages.ClusterConfig() changed_fields = [] has_changes = False if args.num_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True # Update labels if the user requested it labels = None if args.update_labels or args.remove_labels: has_changes = True changed_fields.append('labels') # We need to fetch cluster first so we know what the labels look like. The # labels_util.UpdateLabels will fill out the proto for us with all the # updates and removals, but first we need to provide the current state # of the labels get_cluster_request = ( dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName)) current_cluster = dataproc.client.projects_regions_clusters.Get( get_cluster_request) labels = labels_util.UpdateLabels( current_cluster.labels, dataproc.messages.Cluster.LabelsValue, args.update_labels, args.remove_labels) if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, labels=labels, projectId=cluster_ref.projectId) request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields)) if (self.ReleaseTrack() == base.ReleaseTrack.BETA and args.graceful_decommission_timeout): request.gracefulDecommissionTimeout = ( str(args.graceful_decommission_timeout) + 's') operation = dataproc.client.projects_regions_clusters.Patch(request) if args.async: log.status.write( 'Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation( dataproc, operation, message='Waiting for cluster update operation', timeout_s=args.timeout) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddJobResourceArg(parser, 'kill', dataproc.api_version)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddOperationResourceArg(parser, 'cancel', dataproc.api_version)
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) cluster_config = dataproc.messages.ClusterConfig() changed_fields = [] has_changes = False if args.num_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True if self.ReleaseTrack() == base.ReleaseTrack.BETA: lifecycle_config = dataproc.messages.LifecycleConfig() changed_config = False if args.max_age is not None: lifecycle_config.autoDeleteTtl = str(args.max_age) + 's' changed_fields.append( 'config.lifecycle_config.auto_delete_ttl') changed_config = True if args.expiration_time is not None: lifecycle_config.autoDeleteTime = times.FormatDateTime( args.expiration_time) changed_fields.append( 'config.lifecycle_config.auto_delete_time') changed_config = True if args.max_idle is not None: lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's' changed_fields.append( 'config.lifecycle_config.idle_delete_ttl') changed_config = True if args.no_max_age: lifecycle_config.autoDeleteTtl = None changed_fields.append( 'config.lifecycle_config.auto_delete_ttl') changed_config = True if args.no_max_idle: lifecycle_config.idleDeleteTtl = None changed_fields.append( 'config.lifecycle_config.idle_delete_ttl') changed_config = True if changed_config: cluster_config.lifecycleConfig = lifecycle_config has_changes = True # Put in a thunk so we only make this call if needed def _GetCurrentLabels(): # We need to fetch cluster first so we know what the labels look like. The # labels_util will fill out the proto for us with all the updates and # removals, but first we need to provide the current state of the labels get_cluster_request = ( dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName)) current_cluster = dataproc.client.projects_regions_clusters.Get( get_cluster_request) return current_cluster.labels labels_update = labels_util.ProcessUpdateArgsLazy( args, dataproc.messages.Cluster.LabelsValue, orig_labels_thunk=_GetCurrentLabels) if labels_update.needs_update: has_changes = True changed_fields.append('labels') labels = labels_update.GetOrNone() if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, labels=labels, projectId=cluster_ref.projectId) request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields), requestId=util.GetUniqueId()) if args.graceful_decommission_timeout is not None: request.gracefulDecommissionTimeout = ( str(args.graceful_decommission_timeout) + 's') operation = dataproc.client.projects_regions_clusters.Patch(request) if args. async: log.status.write('Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation(dataproc, operation, message='Waiting for cluster update operation', timeout_s=args.timeout) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Args(parser): dataproc = dp.Dataproc(base.ReleaseTrack.GA) flags.AddBatchResourceArg(parser, 'wait', dataproc.api_version)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) labels_util.AddCreateLabelsFlags(parser) flags.AddTemplateResourceArg(parser, 'create', dataproc.api_version)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddAutoscalingPolicyResourceArg(parser, 'retrieve the IAM policy for', api_version=dataproc.api_version)
def Args(parser): dataproc = dp.Dataproc() flags.AddBatchResourceArg(parser, 'describe', dataproc.api_version)
def GetApiVersion(cls): """Returns the API version based on the release track.""" return dp.Dataproc(cls.ReleaseTrack()).api_version
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddImportArgs(parser, 'import', dataproc.api_version, 'AutoscalingPolicy')
def Run(self, args): message = ( 'A personal authentication session will propagate your personal ' 'credentials to the cluster, so make sure you trust the cluster ' 'and the user who created it.') console_io.PromptContinue( message=message, cancel_on_no=True, cancel_string='Enabling session aborted by user') dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() project = cluster_ref.projectId region = cluster_ref.region cluster_name = cluster_ref.clusterName get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=project, region=region, clusterName=cluster_name) cluster = dataproc.client.projects_regions_clusters.Get(get_request) cluster_uuid = cluster.clusterUuid if args.access_boundary: with files.FileReader(args.access_boundary) as abf: access_boundary_json = abf.read() else: access_boundary_json = flags.ProjectGcsObjectsAccessBoundary( project) # ECIES keys should be used by default. If tink libraries are absent from # the system then fallback to using RSA keys. cluster_key_type = 'ECIES' if self.is_tink_library_installed( ) else 'RSA' cluster_key = None if cluster_key_type == 'ECIES': # Try to fetch ECIES keys from cluster control plane node's metadata. # If ECIES keys are not available then again fallback to RSA keys. cluster_key = clusters.ClusterKey(cluster, cluster_key_type) if not cluster_key: cluster_key_type = 'RSA' openssl_executable = None if cluster_key_type == 'RSA': cluster_key = clusters.ClusterKey(cluster, cluster_key_type) openssl_executable = args.openssl_command if not openssl_executable: try: openssl_executable = files.FindExecutableOnPath('openssl') except ValueError: log.fatal( 'Could not find openssl on your system. The enable-session ' 'command requires openssl to be installed.') operation_poller = waiter.CloudOperationPollerNoResources( dataproc.client.projects_regions_operations, lambda operation: operation.name) try: if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'.format( cluster_name)) with progress_tracker.ProgressTracker( 'Injecting initial credentials into the cluster {}'.format( cluster_name), autotick=True): self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, operation_poller, openssl_executable) if not args.refresh_credentials: return update_message = ( 'Periodically refreshing credentials for cluster {}. This' ' will continue running until the command is interrupted' ).format(cluster_name) with progress_tracker.ProgressTracker(update_message, autotick=True): try: # Cluster keys are periodically regenerated, so fetch the latest # each time we inject credentials. cluster = dataproc.client.projects_regions_clusters.Get( get_request) cluster_key = clusters.ClusterKey(cluster, cluster_key_type) if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'. format(cluster_name)) failure_count = 0 while failure_count < 3: try: time.sleep(30) self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, operation_poller, openssl_executable) failure_count = 0 except ValueError as err: log.error(err) failure_count += 1 raise exceptions.PersonalAuthError( 'Credential injection failed three times in a row, giving up...' ) except (console_io.OperationCancelledError, KeyboardInterrupt): return except exceptions.PersonalAuthError as err: log.error(err) return
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddTimeoutFlag(parser, default='35m') base.ASYNC_FLAG.AddToParser(parser) flags.AddParametersFlag(parser) flags.AddTemplateResourceArg(parser, 'run', dataproc.api_version)
def SetUp(self): self.dataproc_mock = dp.Dataproc(self.track) self.dataproc_mock._client = self.mock_client self.dataproc_mock._messages = self.messages
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() project = cluster_ref.projectId region = cluster_ref.region cluster_name = cluster_ref.clusterName get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=project, region=region, clusterName=cluster_name) cluster = dataproc.client.projects_regions_clusters.Get(get_request) cluster_uuid = cluster.clusterUuid if args.access_boundary: with files.FileReader(args.access_boundary, mode='r') as abf: access_boundary_json = abf.read() else: access_boundary_json = flags.ProjectGcsObjectsAccessBoundary( project) openssl_executable = args.openssl_command if not openssl_executable: try: openssl_executable = files.FindExecutableOnPath('openssl') except ValueError: log.fatal( 'Could not find openssl on your system. The enable-session ' 'command requires openssl to be installed.') operation_poller = waiter.CloudOperationPollerNoResources( dataproc.client.projects_regions_operations, lambda operation: operation.name) try: cluster_key = clusters.ClusterKey(cluster) if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'.format( cluster_name)) with progress_tracker.ProgressTracker( 'Injecting initial credentials into the cluster {}'.format( cluster_name), autotick=True): self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, openssl_executable, operation_poller) if not args.refresh_credentials: return update_message = ( 'Periodically refreshing credentials for cluster {}. This' ' will continue running until the command is interrupted' ).format(cluster_name) with progress_tracker.ProgressTracker(update_message, autotick=True): try: # Cluster keys are periodically regenerated, so fetch the latest # each time we inject credentials. cluster = dataproc.client.projects_regions_clusters.Get( get_request) cluster_key = clusters.ClusterKey(cluster) if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'. format(cluster_name)) failure_count = 0 while failure_count < 3: try: time.sleep(30) self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, openssl_executable, operation_poller) failure_count = 0 except ValueError as err: log.error(err) failure_count += 1 raise exceptions.PersonalAuthError( 'Credential injection failed three times in a row, giving up...' ) except (console_io.OperationCancelledError, KeyboardInterrupt): return except exceptions.PersonalAuthError as err: log.error(err) return
def Run(self, args): dataproc = dp.Dataproc(base.ReleaseTrack.GA) sparkr_batch = sparkr_batch_factory.SparkRBatchFactory( dataproc).UploadLocalFilesAndGetMessage(args) return batch_submitter.Submit(sparkr_batch, dataproc, args)
def Args(cls, parser): pig.PigBase.Args(parser) dataproc = dp.Dataproc(cls.ReleaseTrack()) workflow_templates.AddWorkflowTemplatesArgs(parser, dataproc.api_version)
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) # Read cluster from YAML file. cluster = util.ReadYaml(args.file, dataproc.messages.Cluster) return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) beta = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig(args, dataproc, cluster_ref.projectId, compute_resources, beta) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc() cluster_ref = dataproc.ParseCluster(args.name) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) master_accelerator_type = None worker_accelerator_type = None master_accelerator_count = None worker_accelerator_count = None if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.master_accelerator: master_accelerator_type = args.master_accelerator['type'] master_accelerator_count = args.master_accelerator.get('count', 1) if args.worker_accelerator: worker_accelerator_type = args.worker_accelerator['type'] worker_accelerator_count = args.worker_accelerator.get('count', 1) # Resolve non-zonal GCE resources # We will let the server resolve short names of zonal resources because # if auto zone is requested, we will not know the zone before sending the # request image_ref = args.image and compute_resources.Parse( args.image, params={'project': cluster_ref.projectId}, collection='compute.images') network_ref = args.network and compute_resources.Parse( args.network, params={'project': cluster_ref.projectId}, collection='compute.networks') subnetwork_ref = args.subnet and compute_resources.Parse( args.subnet, params={ 'project': cluster_ref.projectId, 'region': properties.VALUES.compute.region.GetOrFail, }, collection='compute.subnetworks') timeout_str = str(args.initialization_action_timeout) + 's' init_actions = [ dataproc.messages.NodeInitializationAction( executableFile=exe, executionTimeout=timeout_str) for exe in (args.initialization_actions or [])] # Increase the client timeout for each initialization action. args.timeout += args.initialization_action_timeout * len(init_actions) expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = dataproc.messages.SoftwareConfig( imageVersion=args.image_version) master_boot_disk_size_gb = args.master_boot_disk_size_gb if args.master_boot_disk_size: master_boot_disk_size_gb = ( api_utils.BytesToGb(args.master_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.preemptible_worker_boot_disk_size)) if args.single_node: args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true' if args.properties: software_config.properties = encoding.DictToMessage( args.properties, dataproc.messages.SoftwareConfig.PropertiesValue) gce_cluster_config = dataproc.messages.GceClusterConfig( networkUri=network_ref and network_ref.SelfLink(), subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(), internalIpOnly=args.no_address, serviceAccount=args.service_account, serviceAccountScopes=expanded_scopes, zoneUri=properties.VALUES.compute.zone.GetOrFail()) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue) master_accelerators = [] if master_accelerator_type: master_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=master_accelerator_type, acceleratorCount=master_accelerator_count)) worker_accelerators = [] if worker_accelerator_type: worker_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=worker_accelerator_type, acceleratorCount=worker_accelerator_count)) cluster_config = dataproc.messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, masterConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_masters, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.master_machine_type, accelerators=master_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=master_boot_disk_size_gb, numLocalSsds=args.num_master_local_ssds,),), workerConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.worker_machine_type, accelerators=worker_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds,),), initializationActions=init_actions, softwareConfig=software_config,) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args.async: log.status.write( 'Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = dataproc.WaitForOperation( operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddClusterResourceArg(parser, 'export', dataproc.api_version) export_util.AddExportFlags(parser, cls.GetSchemaPath(for_help=True))
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddJobResourceArg(parser, 'retrieve the policy for', dataproc.api_version) base.URI_FLAG.RemoveFromParser(parser)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddTemplateResourceArg(parser, 'set the policy on', dataproc.api_version) iam_util.AddArgForPolicyFile(parser)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddTemplateResourceArg(parser, 'describe', dataproc.api_version) flags.AddVersionFlag(parser)
def Run(self, args): return _Run(dp.Dataproc(self.ReleaseTrack()), args)
def Args(cls, parser): flags.AddTimeoutFlag(parser) dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
def Args(cls, parser): base.ASYNC_FLAG.AddToParser(parser) flags.AddTimeoutFlag(parser) dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddClusterResourceArg(parser, 'stop', dataproc.api_version)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) base.ASYNC_FLAG.AddToParser(parser) # Allow the user to specify new labels as well as update/remove existing labels_util.AddUpdateLabelsFlags(parser) # Updates can take hours if a lot of data needs to be moved on HDFS flags.AddTimeoutFlag(parser, default='3h') flags.AddClusterResourceArg(parser, 'update', dataproc.api_version) parser.add_argument( '--num-workers', type=int, help='The new number of worker nodes in the cluster.') parser.add_argument( '--num-preemptible-workers', type=int, help='The new number of preemptible worker nodes in the cluster.') parser.add_argument('--graceful-decommission-timeout', type=arg_parsers.Duration(lower_bound='0s', upper_bound='1d'), help=""" The graceful decommission timeout for decommissioning Node Managers in the cluster, used when removing nodes. Graceful decommissioning allows removing nodes from the cluster without interrupting jobs in progress. Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes (and potentially interrupting jobs). Timeout defaults to 0 if not set (for forceful decommission), and the maximum allowed timeout is 1 day. See $ gcloud topic datetimes for information on duration formats. """) idle_delete_group = parser.add_mutually_exclusive_group() idle_delete_group.add_argument('--max-idle', type=arg_parsers.Duration(), help="""\ The duration before cluster is auto-deleted after last job finished, such as "2h" or "1d". See $ gcloud topic datetimes for information on duration formats. """) idle_delete_group.add_argument('--no-max-idle', action='store_true', help="""\ Cancels the cluster auto-deletion by cluster idle duration (configured by --max-idle flag) """) auto_delete_group = parser.add_mutually_exclusive_group() auto_delete_group.add_argument('--max-age', type=arg_parsers.Duration(), help="""\ The lifespan of the cluster before it is auto-deleted, such as "2h" or "1d". See $ gcloud topic datetimes for information on duration formats. """) auto_delete_group.add_argument('--expiration-time', type=arg_parsers.Datetime.Parse, help="""\ The time when cluster will be auto-deleted, such as "2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for information on time formats. """) auto_delete_group.add_argument('--no-max-age', action='store_true', help="""\ Cancels the cluster auto-deletion by maximum cluster age (configured by --max-age or --expiration-time flags) """) # Can only specify one of --autoscaling-policy or --disable-autoscaling autoscaling_group = parser.add_mutually_exclusive_group() flags.AddAutoscalingPolicyResourceArgForCluster(autoscaling_group, api_version='v1') autoscaling_group.add_argument('--disable-autoscaling', action='store_true', help="""\ Disable autoscaling, if it is enabled. This is an alias for passing the empty string to --autoscaling-policy', """)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) flags.AddTemplateResourceArg(parser, 'export', dataproc.api_version) export_util.AddExportFlags(parser) flags.AddVersionFlag(parser)
def Args(cls, parser): dataproc = dp.Dataproc(cls.ReleaseTrack()) labels_util.AddCreateLabelsFlags(parser) workflow_templates.AddDagTimeoutFlag(parser, False) flags.AddTemplateResourceArg(parser, 'create', dataproc.api_version)