def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) cluster_config = clusters.GetClusterConfig(args, dataproc, cluster_ref.projectId, compute_resources, self.BETA, include_ttl_config=True) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) return clusters.CreateCluster(dataproc, cluster, args. async, args.timeout)
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) request = messages.DataprocProjectsRegionsClustersDeleteRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) console_io.PromptContinue( message="The cluster '{0}' and all attached disks will be " 'deleted.'.format(args.name), cancel_on_no=True, cancel_string='Deletion aborted by user.') operation = client.projects_regions_clusters.Delete(request) if args. async: log.status.write('Deleting [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster deletion operation') log.DeletedResource(cluster_ref) return operation
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersDeleteRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) console_io.PromptContinue( message="The cluster '{0}' and all attached disks will be " 'deleted.'.format(args.name), cancel_on_no=True, cancel_string='Deletion aborted by user.') operation = dataproc.client.projects_regions_clusters.Delete(request) if args. async: log.status.write('Deleting [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster deletion operation', timeout_s=args.timeout) log.DeletedResource(cluster_ref) return operation
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = dp_util.ParseCluster(args.name, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) # Filter out Dataproc-generated labels. clusters.DeleteGeneratedLabels(cluster, dataproc) schema_path = self.GetSchemaPath() if args.destination: with files.FileWriter(args.destination) as stream: export_util.Export(message=cluster, stream=stream, schema_path=schema_path) else: export_util.Export(message=cluster, stream=sys.stdout, schema_path=schema_path)
def Run(self, args): """This is what gets called when the user runs this command.""" dataproc = dp.Dataproc(self.ReleaseTrack()) request_id = util.GetUniqueId() job_id = args.id if args.id else request_id # Don't use ResourceArgument, because --id is hidden by default job_ref = util.ParseJob(job_id, dataproc) self.PopulateFilesByType(args) cluster_ref = util.ParseCluster(args.cluster, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) self._staging_dir = self.GetStagingDir(cluster, job_ref.jobId, bucket=args.bucket) self.ValidateAndStageFiles() job = dataproc.messages.Job( reference=dataproc.messages.JobReference( projectId=job_ref.projectId, jobId=job_ref.jobId), placement=dataproc.messages.JobPlacement(clusterName=args.cluster)) self.ConfigureJob(dataproc.messages, job, args) if args.max_failures_per_hour: scheduling = dataproc.messages.JobScheduling( maxFailuresPerHour=args.max_failures_per_hour) job.scheduling = scheduling request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest( projectId=job_ref.projectId, region=job_ref.region, submitJobRequest=dataproc.messages.SubmitJobRequest( job=job, requestId=request_id)) job = dataproc.client.projects_regions_jobs.Submit(request) log.status.Print('Job [{0}] submitted.'.format(job_id)) if not args.async_: job = util.WaitForJobTermination( dataproc, job, job_ref, message='Waiting for job completion', goal_state=dataproc.messages.JobStatus.StateValueValuesEnum. DONE, error_state=dataproc.messages.JobStatus.StateValueValuesEnum. ERROR, stream_driver_log=True) log.status.Print('Job [{0}] finished successfully.'.format(job_id)) return job
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) data = console_io.ReadFromFileOrStdin(args.file or '-', binary=False) cluster = export_util.Import(message_type=dataproc.messages.Cluster, stream=data) cluster_ref = util.ParseCluster(cluster.clusterName, dataproc) return clusters.CreateCluster(dataproc, cluster_ref, cluster, args.async_, args.timeout)
def Run(self, args): client = self.context['dataproc_client'] cluster_ref = util.ParseCluster(args.name, self.context) request = cluster_ref.Request() cluster = client.projects_clusters.Get(request) return cluster
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) messages = dataproc.messages cluster_ref = util.ParseCluster(args.name, dataproc) request = messages.DataprocProjectsRegionsClustersGetIamPolicyRequest( resource=cluster_ref.RelativeName()) return dataproc.client.projects_regions_clusters.GetIamPolicy(request)
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) cluster_config = messages.ClusterConfiguration() changed_fields = [] has_changes = False if args.new_num_workers is not None: log.warn( '--new-num-workers parameter is deprecated and will be removed ' 'in a future release. Please use --num-workers instead') args.num_workers = args.new_num_workers if args.num_workers is not None: worker_config = messages.InstanceGroupConfiguration( numInstances=args.num_workers) cluster_config.workerConfiguration = worker_config changed_fields.append( 'configuration.worker_configuration.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = messages.InstanceGroupConfiguration( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfiguration = worker_config changed_fields.append( 'configuration.secondary_worker_configuration.num_instances') has_changes = True if not has_changes: raise exceptions.ToolException( 'Must specify at least one cluster parameter to update.') cluster = messages.Cluster(configuration=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) request = messages.DataprocProjectsClustersPatchRequest( clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields)) operation = client.projects_clusters.Patch(request) util.WaitForOperation(operation, self.context, message='Waiting for cluster update operation', timeout_s=3600 * 3) cluster = client.projects_clusters.Get(cluster_ref.Request()) log.UpdatedResource(cluster_ref) return cluster
def CreateCluster(dataproc, cluster, is_async, timeout): """Create a cluster. Args: dataproc: Dataproc object that contains client, messages, and resources cluster: Cluster to create is_async: Whether to wait for the operation to complete timeout: Timeout used when waiting for the operation to complete Returns: Created cluster, or None if async """ # Get project id and region. cluster_ref = util.ParseCluster(cluster.clusterName, dataproc) request_id = util.GetUniqueId() request = dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( cluster=cluster, projectId=cluster_ref.projectId, region=cluster_ref.region, requestId=request_id) operation = dataproc.client.projects_regions_clusters.Create(request) if is_async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster creation operation', timeout_s=timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): client = self.context['dataproc_client'] cluster_ref = util.ParseCluster(args.name, self.context) request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = client.projects_regions_clusters.Get(request) return cluster
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) return cluster
def Run(self, args): """This is what gets called when the user runs this command.""" client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] job_id = util.GetJobId(args.id) job_ref = util.ParseJob(job_id, self.context) self.PopulateFilesByType(args) cluster_ref = util.ParseCluster(args.cluster, self.context) request = messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) try: cluster = client.projects_regions_clusters.Get(request) except apitools_exceptions.HttpError as error: raise exceptions.HttpException(error) self._staging_dir = self.GetStagingDir(cluster, job_ref.jobId) self.ValidateAndStageFiles() job = messages.Job( reference=messages.JobReference(projectId=job_ref.projectId, jobId=job_ref.jobId), placement=messages.JobPlacement(clusterName=args.cluster)) self.ConfigureJob(job, args) request = messages.DataprocProjectsRegionsJobsSubmitRequest( projectId=job_ref.projectId, region=job_ref.region, submitJobRequest=messages.SubmitJobRequest(job=job)) try: job = client.projects_regions_jobs.Submit(request) except apitools_exceptions.HttpError as error: raise exceptions.HttpException(error) log.status.Print('Job [{0}] submitted.'.format(job_id)) if not args. async: job = util.WaitForJobTermination( job, self.context, message='Waiting for job completion', goal_state=messages.JobStatus.StateValueValuesEnum.DONE, stream_driver_log=True) log.status.Print('Job [{0}] finished successfully.'.format(job_id)) return job
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) messages = dataproc.messages policy = iam_util.ParsePolicyFile(args.policy_file, messages.Policy) set_iam_policy_request = messages.SetIamPolicyRequest(policy=policy) cluster_ref = util.ParseCluster(args.cluster, dataproc) request = messages.DataprocProjectsRegionsClustersSetIamPolicyRequest( resource=cluster_ref.RelativeName(), setIamPolicyRequest=set_iam_policy_request) return dataproc.client.projects_regions_clusters.SetIamPolicy(request)
def Run(self, args): """This is what gets called when the user runs this command.""" dataproc = dp.Dataproc(self.ReleaseTrack()) job_id = util.GetJobId(args.id) job_ref = util.ParseJob(job_id, dataproc) self.PopulateFilesByType(args) cluster_ref = util.ParseCluster(args.cluster, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) self._staging_dir = self.GetStagingDir( cluster, job_ref.jobId, bucket=args.bucket) self.ValidateAndStageFiles() job = dataproc.messages.Job( reference=dataproc.messages.JobReference( projectId=job_ref.projectId, jobId=job_ref.jobId), placement=dataproc.messages.JobPlacement( clusterName=args.cluster)) self.ConfigureJob(dataproc.messages, job, args) request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest( projectId=job_ref.projectId, region=job_ref.region, submitJobRequest=dataproc.messages.SubmitJobRequest( job=job)) job = dataproc.client.projects_regions_jobs.Submit(request) log.status.Print('Job [{0}] submitted.'.format(job_id)) if not args.async: job = util.WaitForJobTermination( dataproc, job, message='Waiting for job completion', goal_state=dataproc.messages.JobStatus.StateValueValuesEnum.DONE, stream_driver_log=True) log.status.Print('Job [{0}] finished successfully.'.format(job_id)) return job
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) msgs = dataproc.messages data = console_io.ReadFromFileOrStdin(args.source or '-', binary=False) cluster = export_util.Import(message_type=msgs.Cluster, stream=data, schema_path=self.GetSchemaPath()) cluster_ref = dp_util.ParseCluster(args.name, dataproc) cluster.clusterName = cluster_ref.clusterName cluster.projectId = cluster_ref.projectId # Import only supports create, not update (for now). return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) request = messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) try: operation = client.projects_regions_clusters.Diagnose(request) # TODO(user): Stream output during polling. operation = util.WaitForOperation( operation, self.context, message='Waiting for cluster diagnose operation') response = operation.response except apitools_exceptions.HttpError as error: raise exceptions.HttpException(util.FormatHttpError(error)) if not response: raise exceptions.ToolException('Operation is missing response') properties = encoding.MessageToDict(response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.ToolException('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) # A single read might not read whole stream. Try a few times. read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None) try: read_retrier.RetryOnResult( lambda: driver_log_stream.ReadIntoWritable(log.err), sleep_ms=100, should_retry_if=lambda *_: driver_log_stream.open) except retry.MaxRetrialsException: log.warn( 'Diagnostic finished succesfully, ' 'but output did not finish streaming.') log.err.Print('-----------------------------------------------') return output_uri
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) operation = dataproc.client.projects_regions_clusters.Diagnose(request) # TODO(b/36052522): Stream output during polling. operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster diagnose operation', timeout_s=args.timeout) if not operation.response: raise exceptions.OperationError('Operation is missing response') properties = encoding.MessageToDict(operation.response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.OperationError('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) # A single read might not read whole stream. Try a few times. read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None) try: read_retrier.RetryOnResult( lambda: driver_log_stream.ReadIntoWritable(log.err), sleep_ms=100, should_retry_if=lambda *_: driver_log_stream.open) except retry.MaxRetrialsException: log.warning( 'Diagnostic finished successfully, ' 'but output did not finish streaming.') log.err.Print('-----------------------------------------------') return output_uri
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) msgs = dataproc.messages if args.source: with files.FileReader(args.source) as stream: cluster = util.ReadYaml(message_type=msgs.Cluster, stream=stream, schema_path=SCHEMA_PATH) else: cluster = util.ReadYaml(message_type=msgs.Cluster, stream=sys.stdin, schema_path=SCHEMA_PATH) cluster_ref = util.ParseCluster(args.name, dataproc) cluster.clusterName = cluster_ref.clusterName cluster.projectId = cluster_ref.projectId # Import only supports create, not update (for now). return clusters.CreateCluster(dataproc, cluster, args. async, args.timeout)
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) request = messages.DataprocProjectsClustersDeleteRequest( clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) if not console_io.PromptContinue( message="The cluster '{0}' and all attached disks will be " 'deleted.'.format(args.name)): raise exceptions.ToolException('Deletion aborted by user.') operation = client.projects_clusters.Delete(request) operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster deletion operation') log.DeletedResource(cluster_ref) return operation
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) request = messages.DataprocProjectsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) try: operation = client.projects_clusters.Diagnose(request) operation = util.WaitForOperation( operation, self.context, message='Waiting for cluster diagnose operation') response = operation.response except apitools_base.HttpError as error: raise exceptions.HttpException(util.FormatHttpError(error)) if not response: raise exceptions.ToolException('Operation is missing response') properties = apitools_base.MessageToDict(response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.ToolException('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) driver_log_stream.ReadIntoWritable(log.err) log.err.Print('-----------------------------------------------') return output_uri
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) master_accelerator_type = None worker_accelerator_type = None master_accelerator_count = None worker_accelerator_count = None if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.master_accelerator: master_accelerator_type = args.master_accelerator['type'] master_accelerator_count = args.master_accelerator.get('count', 1) if args.worker_accelerator: worker_accelerator_type = args.worker_accelerator['type'] worker_accelerator_count = args.worker_accelerator.get('count', 1) # Resolve non-zonal GCE resources # We will let the server resolve short names of zonal resources because # if auto zone is requested, we will not know the zone before sending the # request image_ref = args.image and compute_resources.Parse( args.image, params={'project': cluster_ref.projectId}, collection='compute.images') network_ref = args.network and compute_resources.Parse( args.network, params={'project': cluster_ref.projectId}, collection='compute.networks') subnetwork_ref = args.subnet and compute_resources.Parse( args.subnet, params={ 'project': cluster_ref.projectId, 'region': properties.VALUES.compute.region.GetOrFail, }, collection='compute.subnetworks') timeout_str = str(args.initialization_action_timeout) + 's' init_actions = [ dataproc.messages.NodeInitializationAction( executableFile=exe, executionTimeout=timeout_str) for exe in (args.initialization_actions or [])] # Increase the client timeout for each initialization action. args.timeout += args.initialization_action_timeout * len(init_actions) expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = dataproc.messages.SoftwareConfig( imageVersion=args.image_version) master_boot_disk_size_gb = args.master_boot_disk_size_gb if args.master_boot_disk_size: master_boot_disk_size_gb = ( api_utils.BytesToGb(args.master_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.preemptible_worker_boot_disk_size)) if args.single_node: args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true' if args.properties: software_config.properties = encoding.DictToMessage( args.properties, dataproc.messages.SoftwareConfig.PropertiesValue) gce_cluster_config = dataproc.messages.GceClusterConfig( networkUri=network_ref and network_ref.SelfLink(), subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(), internalIpOnly=args.no_address, serviceAccount=args.service_account, serviceAccountScopes=expanded_scopes, zoneUri=properties.VALUES.compute.zone.GetOrFail()) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue) master_accelerators = [] if master_accelerator_type: master_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=master_accelerator_type, acceleratorCount=master_accelerator_count)) worker_accelerators = [] if worker_accelerator_type: worker_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=worker_accelerator_type, acceleratorCount=worker_accelerator_count)) cluster_config = dataproc.messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, masterConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_masters, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.master_machine_type, accelerators=master_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=master_boot_disk_size_gb, numLocalSsds=args.num_master_local_ssds,),), workerConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.worker_machine_type, accelerators=worker_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds,),), initializationActions=init_actions, softwareConfig=software_config,) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args.async: log.status.write( 'Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) cluster_config = dataproc.messages.ClusterConfig() changed_fields = [] has_changes = False if args.num_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True if self.ReleaseTrack() == base.ReleaseTrack.BETA: lifecycle_config = dataproc.messages.LifecycleConfig() changed_config = False if args.max_age is not None: lifecycle_config.autoDeleteTtl = str(args.max_age) + 's' changed_config = True if args.expiration_time is not None: lifecycle_config.autoDeleteTime = times.FormatDateTime( args.expiration_time) changed_config = True if args.max_idle is not None: lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's' changed_config = True if changed_config: cluster_config.lifecycleConfig = lifecycle_config changed_fields.append('config.lifecycle_config') has_changes = True # Update labels if the user requested it labels = None if args.update_labels or args.remove_labels: has_changes = True changed_fields.append('labels') # We need to fetch cluster first so we know what the labels look like. The # labels_util.UpdateLabels will fill out the proto for us with all the # updates and removals, but first we need to provide the current state # of the labels get_cluster_request = ( dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName)) current_cluster = dataproc.client.projects_regions_clusters.Get( get_cluster_request) labels = labels_util.UpdateLabels( current_cluster.labels, dataproc.messages.Cluster.LabelsValue, args.update_labels, args.remove_labels) if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, labels=labels, projectId=cluster_ref.projectId) request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields)) if (self.ReleaseTrack() == base.ReleaseTrack.BETA and args.graceful_decommission_timeout): request.gracefulDecommissionTimeout = ( str(args.graceful_decommission_timeout) + 's') operation = dataproc.client.projects_regions_clusters.Patch(request) if args. async: log.status.write('Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation(dataproc, operation, message='Waiting for cluster update operation', timeout_s=args.timeout) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) cluster_config = messages.ClusterConfig() changed_fields = [] has_changes = False if args.num_workers is not None: worker_config = messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True # Update labels if the user requested it labels = None if args.update_labels or args.remove_labels: has_changes = True changed_fields.append('labels') # We need to fetch cluster first so we know what the labels look like. The # labels_util.UpdateLabels will fill out the proto for us with all the # updates and removals, but first we need to provide the current state # of the labels get_cluster_request = (client.MESSAGES_MODULE. DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName)) current_cluster = client.projects_regions_clusters.Get( get_cluster_request) labels = labels_util.UpdateLabels(current_cluster.labels, messages.Cluster.LabelsValue, args.update_labels, args.remove_labels) if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = messages.Cluster(config=cluster_config, clusterName=cluster_ref.clusterName, labels=labels, projectId=cluster_ref.projectId) request = messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields)) operation = client.projects_regions_clusters.Patch(request) if args. async: log.status.write('Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation(operation, self.context, message='Waiting for cluster update operation', timeout_s=3600 * 3) request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Run(self, args): self.ValidateArgs(args) client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) main_accelerator_type = None worker_accelerator_type = None main_accelerator_count = None worker_accelerator_count = None if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.main_accelerator: main_accelerator_type = args.main_accelerator['type'] main_accelerator_count = args.main_accelerator.get('count', 1) if args.worker_accelerator: worker_accelerator_type = args.worker_accelerator['type'] worker_accelerator_count = args.worker_accelerator.get( 'count', 1) # Resolve GCE resources zone_ref = compute_resources.Parse(None, collection='compute.zones') image_ref = args.image and compute_resources.Parse( args.image, collection='compute.images') main_machine_type_ref = (args.main_machine_type and compute_resources.Parse( args.main_machine_type, collection='compute.machineTypes')) worker_machine_type_ref = (args.worker_machine_type and compute_resources.Parse( args.worker_machine_type, collection='compute.machineTypes')) network_ref = args.network and compute_resources.Parse( args.network, collection='compute.networks') subnetwork_ref = args.subnet and compute_resources.Parse( args.subnet, collection='compute.subnetworks') main_accelerator_type_ref = ( main_accelerator_type and compute_resources.Parse( main_accelerator_type, collection='compute.acceleratorTypes')) worker_accelerator_type_ref = ( worker_accelerator_type and compute_resources.Parse(worker_accelerator_type, collection='compute.acceleratorTypes')) init_actions = [] timeout_str = str(args.initialization_action_timeout) + 's' if args.initialization_actions: init_actions = [ messages.NodeInitializationAction(executableFile=exe, executionTimeout=timeout_str) for exe in args.initialization_actions ] expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = messages.SoftwareConfig( imageVersion=args.image_version) main_boot_disk_size_gb = args.main_boot_disk_size_gb if args.main_boot_disk_size: main_boot_disk_size_gb = (api_utils.BytesToGb( args.main_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = (api_utils.BytesToGb( args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb( args.preemptible_worker_boot_disk_size)) if args.single_node: args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true' if args.properties: software_config.properties = encoding.DictToMessage( args.properties, messages.SoftwareConfig.PropertiesValue) gce_cluster_config = messages.GceClusterConfig( networkUri=network_ref and network_ref.SelfLink(), subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(), serviceAccount=args.service_account, serviceAccountScopes=expanded_scopes, zoneUri=zone_ref and zone_ref.SelfLink()) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict( (k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, messages.GceClusterConfig.MetadataValue) main_accelerators = [] if main_accelerator_type: main_accelerators.append( messages.AcceleratorConfig( acceleratorTypeUri=main_accelerator_type_ref and main_accelerator_type_ref.SelfLink(), acceleratorCount=main_accelerator_count)) worker_accelerators = [] if worker_accelerator_type: worker_accelerators.append( messages.AcceleratorConfig( acceleratorTypeUri=worker_accelerator_type_ref and worker_accelerator_type_ref.SelfLink(), acceleratorCount=worker_accelerator_count)) cluster_config = messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, mainConfig=messages.InstanceGroupConfig( numInstances=args.num_mains, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=main_machine_type_ref and main_machine_type_ref.SelfLink(), accelerators=main_accelerators, diskConfig=messages.DiskConfig( bootDiskSizeGb=main_boot_disk_size_gb, numLocalSsds=args.num_main_local_ssds, ), ), workerConfig=messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=worker_machine_type_ref and worker_machine_type_ref.SelfLink(), accelerators=worker_accelerators, diskConfig=messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds, ), ), initializationActions=init_actions, softwareConfig=software_config, ) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, )) ) cluster = messages.Cluster(config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(messages, args, cluster) operation = client.projects_regions_clusters.Create( messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster creation operation') get_request = messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( messages.ClusterStatus.StateValueValuesEnum.RUNNING): log.CreatedResource(cluster_ref) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): self.ValidateArgs(args) client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) config_helper = compute_helpers.ConfigurationHelper.FromContext( self.context) compute_uris = config_helper.ResolveGceUris(args.name, args.image, args.master_machine_type, args.worker_machine_type, args.network, args.subnet) init_actions = [] timeout_str = str(args.initialization_action_timeout) + 's' if args.initialization_actions: init_actions = [ messages.NodeInitializationAction(executableFile=exe, executionTimeout=timeout_str) for exe in args.initialization_actions ] expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = messages.SoftwareConfig( imageVersion=args.image_version) master_boot_disk_size_gb = args.master_boot_disk_size_gb if args.master_boot_disk_size: master_boot_disk_size_gb = (api_utils.BytesToGb( args.master_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = (api_utils.BytesToGb( args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb( args.preemptible_worker_boot_disk_size)) if args.properties: software_config.properties = encoding.DictToMessage( args.properties, messages.SoftwareConfig.PropertiesValue) gce_cluster_config = messages.GceClusterConfig( networkUri=compute_uris['network'], subnetworkUri=compute_uris['subnetwork'], serviceAccountScopes=expanded_scopes, zoneUri=compute_uris['zone']) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict( (k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, messages.GceClusterConfig.MetadataValue) cluster_config = messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, masterConfig=messages.InstanceGroupConfig( numInstances=args.num_masters, imageUri=compute_uris['image'], machineTypeUri=compute_uris['master_machine_type'], diskConfig=messages.DiskConfig( bootDiskSizeGb=master_boot_disk_size_gb, numLocalSsds=args.num_master_local_ssds, ), ), workerConfig=messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=compute_uris['image'], machineTypeUri=compute_uris['worker_machine_type'], diskConfig=messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds, ), ), initializationActions=init_actions, softwareConfig=software_config, ) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, )) ) cluster = messages.Cluster(config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) operation = client.projects_regions_clusters.Create( messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster creation operation') cluster = client.projects_regions_clusters.Get(cluster_ref.Request()) if cluster.status.state == ( messages.ClusterStatus.StateValueValuesEnum.RUNNING): log.CreatedResource(cluster_ref) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) beta = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig(args, dataproc, cluster_ref.projectId, compute_resources, beta) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) cluster_config = messages.ClusterConfig() changed_fields = [] has_changes = False if args.new_num_workers is not None: log.warn( '--new-num-workers parameter is deprecated and will be removed ' 'in a future release. Please use --num-workers instead') args.num_workers = args.new_num_workers if args.num_workers is not None: worker_config = messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = messages.Cluster(config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) request = messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields)) operation = client.projects_regions_clusters.Patch(request) if args. async: log.status.write('Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation(operation, self.context, message='Waiting for cluster update operation', timeout_s=3600 * 3) request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) cluster_config = dataproc.messages.ClusterConfig() changed_fields = [] has_changes = False if args.num_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers) cluster_config.workerConfig = worker_config changed_fields.append('config.worker_config.num_instances') has_changes = True if args.num_preemptible_workers is not None: worker_config = dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers) cluster_config.secondaryWorkerConfig = worker_config changed_fields.append( 'config.secondary_worker_config.num_instances') has_changes = True if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.autoscaling_policy: cluster_config.autoscalingConfig = dataproc.messages.AutoscalingConfig( policyUri=args.CONCEPTS.autoscaling_policy.Parse( ).RelativeName()) changed_fields.append('config.autoscaling_config.policy_uri') has_changes = True elif args.autoscaling_policy == '' or args.disable_autoscaling: # pylint: disable=g-explicit-bool-comparison # Disabling autoscaling. Don't need to explicitly set # cluster_config.autoscaling_config to None. changed_fields.append('config.autoscaling_config.policy_uri') has_changes = True lifecycle_config = dataproc.messages.LifecycleConfig() changed_config = False if args.max_age is not None: lifecycle_config.autoDeleteTtl = str(args.max_age) + 's' changed_fields.append( 'config.lifecycle_config.auto_delete_ttl') changed_config = True if args.expiration_time is not None: lifecycle_config.autoDeleteTime = times.FormatDateTime( args.expiration_time) changed_fields.append( 'config.lifecycle_config.auto_delete_time') changed_config = True if args.max_idle is not None: lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's' changed_fields.append( 'config.lifecycle_config.idle_delete_ttl') changed_config = True if args.no_max_age: lifecycle_config.autoDeleteTtl = None changed_fields.append( 'config.lifecycle_config.auto_delete_ttl') changed_config = True if args.no_max_idle: lifecycle_config.idleDeleteTtl = None changed_fields.append( 'config.lifecycle_config.idle_delete_ttl') changed_config = True if changed_config: cluster_config.lifecycleConfig = lifecycle_config has_changes = True # Put in a thunk so we only make this call if needed def _GetCurrentLabels(): # We need to fetch cluster first so we know what the labels look like. The # labels_util will fill out the proto for us with all the updates and # removals, but first we need to provide the current state of the labels get_cluster_request = ( dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName)) current_cluster = dataproc.client.projects_regions_clusters.Get( get_cluster_request) return current_cluster.labels labels_update = labels_util.ProcessUpdateArgsLazy( args, dataproc.messages.Cluster.LabelsValue, orig_labels_thunk=_GetCurrentLabels) if labels_update.needs_update: has_changes = True changed_fields.append('labels') labels = labels_update.GetOrNone() if not has_changes: raise exceptions.ArgumentError( 'Must specify at least one cluster parameter to update.') cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, labels=labels, projectId=cluster_ref.projectId) request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, cluster=cluster, updateMask=','.join(changed_fields), requestId=util.GetUniqueId()) if args.graceful_decommission_timeout is not None: request.gracefulDecommissionTimeout = ( str(args.graceful_decommission_timeout) + 's') operation = dataproc.client.projects_regions_clusters.Patch(request) if args. async: log.status.write('Updating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return util.WaitForOperation(dataproc, operation, message='Waiting for cluster update operation', timeout_s=args.timeout) request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(request) log.UpdatedResource(cluster_ref) return cluster
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) config_helper = compute_helpers.ConfigurationHelper.FromContext( self.context) compute_uris = config_helper.ResolveGceUris(args.name, args.image, args.master_machine_type, args.worker_machine_type, args.network) init_actions = [] timeout_str = str(args.initialization_action_timeout) + 's' if args.initialization_actions: init_actions = [ messages.NodeInitializationAction(executableFile=exe, executionTimeout=timeout_str) for exe in args.initialization_actions ] expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) cluster_config = messages.ClusterConfiguration( configurationBucket=args.bucket, gceClusterConfiguration=messages.GceClusterConfiguration( networkUri=compute_uris['network'], serviceAccountScopes=expanded_scopes, zoneUri=compute_uris['zone'], ), masterConfiguration=messages.InstanceGroupConfiguration( imageUri=compute_uris['image'], machineTypeUri=compute_uris['master_machine_type'], diskConfiguration=messages.DiskConfiguration( bootDiskSizeGb=args.master_boot_disk_size_gb, numLocalSsds=args.num_master_local_ssds, ), ), workerConfiguration=messages.InstanceGroupConfiguration( numInstances=args.num_workers, imageUri=compute_uris['image'], machineTypeUri=compute_uris['worker_machine_type'], diskConfiguration=messages.DiskConfiguration( bootDiskSizeGb=args.worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds, ), ), initializationActions=init_actions, softwareConfiguration=messages.SoftwareConfiguration( imageVersion=args.image_version), ) # Secondary worker group is optional. if args.num_preemptible_workers is not None: cluster_config.secondaryWorkerConfiguration = ( messages.InstanceGroupConfiguration( numInstances=args.num_preemptible_workers)) cluster = messages.Cluster(configuration=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) operation = client.projects_clusters.Create(cluster) operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster creation operation') cluster = client.projects_clusters.Get(cluster_ref.Request()) if cluster.status.state == ( messages.ClusterStatus.StateValueValuesEnum.RUNNING): log.CreatedResource(cluster_ref) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster