def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) template_ref = args.CONCEPTS.template.Parse() workflow_template = dataproc.GetRegionsWorkflowTemplate( template_ref, args.version) if args.cluster_name: cluster_name = args.cluster_name else: cluster_name = template_ref.workflowTemplatesId compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), cluster_name) beta = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig(args, dataproc, template_ref.projectsId, compute_resources, beta) labels = labels_util.ParseCreateArgs( args, dataproc.messages.ManagedCluster.LabelsValue) managed_cluster = dataproc.messages.ManagedCluster( clusterName=cluster_name, config=cluster_config, labels=labels) workflow_template.placement = dataproc.messages.WorkflowTemplatePlacement( managedCluster=managed_cluster) response = dataproc.client.projects_regions_workflowTemplates.Update( workflow_template) return response
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) template = util.ParseWorkflowTemplates(args.template, dataproc) workflow_template = dataproc.GetRegionsWorkflowTemplate( template, args.version) cluster_name = template.workflowTemplatesId compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), cluster_name) use_accelerators = self.ReleaseTrack() == base.ReleaseTrack.BETA use_auto_delete_ttl = self.ReleaseTrack() == base.ReleaseTrack.BETA use_min_cpu_platform = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig( args, dataproc, template.projectsId, compute_resources, use_accelerators, use_auto_delete_ttl, use_min_cpu_platform) labels = labels_util.ParseCreateArgs( args, dataproc.messages.ManagedCluster.LabelsValue) managed_cluster = dataproc.messages.ManagedCluster( clusterName=cluster_name, config=cluster_config, labels=labels) workflow_template.placement = dataproc.messages.WorkflowTemplatePlacement( managedCluster=managed_cluster) response = dataproc.client.projects_regions_workflowTemplates.Update( workflow_template) return response
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), cluster_ref.clusterName, cluster_ref.region) cluster_config = clusters.GetClusterConfig(args, dataproc, cluster_ref.projectId, compute_resources, self.BETA, include_ttl_config=True) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) return clusters.CreateCluster(dataproc, cluster_ref, cluster, args.async_, args.timeout)
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), cluster_ref.clusterName, cluster_ref.region) cluster_config = clusters.GetClusterConfig( args, dataproc, cluster_ref.projectId, compute_resources, self.BETA, include_ttl_config=True, include_gke_platform_args=self.BETA) action_on_failed_primary_workers = None if not self.BETA: action_on_failed_primary_workers = arg_utils.ChoiceToEnum( args.action_on_failed_primary_workers, dataproc.messages.DataprocProjectsRegionsClustersCreateRequest .ActionOnFailedPrimaryWorkersValueValuesEnum) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) return clusters.CreateCluster( dataproc, cluster_ref, cluster, args.async_, args.timeout, enable_create_on_gke=self.BETA, action_on_failed_primary_workers=action_on_failed_primary_workers)
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) beta = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig( args, dataproc, cluster_ref.projectId, compute_resources, beta) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
def Run(self, args): self.ValidateArgs(args) client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) main_accelerator_type = None worker_accelerator_type = None main_accelerator_count = None worker_accelerator_count = None if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.main_accelerator: main_accelerator_type = args.main_accelerator['type'] main_accelerator_count = args.main_accelerator.get('count', 1) if args.worker_accelerator: worker_accelerator_type = args.worker_accelerator['type'] worker_accelerator_count = args.worker_accelerator.get( 'count', 1) # Resolve GCE resources zone_ref = compute_resources.Parse(None, collection='compute.zones') image_ref = args.image and compute_resources.Parse( args.image, collection='compute.images') main_machine_type_ref = (args.main_machine_type and compute_resources.Parse( args.main_machine_type, collection='compute.machineTypes')) worker_machine_type_ref = (args.worker_machine_type and compute_resources.Parse( args.worker_machine_type, collection='compute.machineTypes')) network_ref = args.network and compute_resources.Parse( args.network, collection='compute.networks') subnetwork_ref = args.subnet and compute_resources.Parse( args.subnet, collection='compute.subnetworks') main_accelerator_type_ref = ( main_accelerator_type and compute_resources.Parse( main_accelerator_type, collection='compute.acceleratorTypes')) worker_accelerator_type_ref = ( worker_accelerator_type and compute_resources.Parse(worker_accelerator_type, collection='compute.acceleratorTypes')) init_actions = [] timeout_str = str(args.initialization_action_timeout) + 's' if args.initialization_actions: init_actions = [ messages.NodeInitializationAction(executableFile=exe, executionTimeout=timeout_str) for exe in args.initialization_actions ] expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = messages.SoftwareConfig( imageVersion=args.image_version) main_boot_disk_size_gb = args.main_boot_disk_size_gb if args.main_boot_disk_size: main_boot_disk_size_gb = (api_utils.BytesToGb( args.main_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = (api_utils.BytesToGb( args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb( args.preemptible_worker_boot_disk_size)) if args.single_node: args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true' if args.properties: software_config.properties = encoding.DictToMessage( args.properties, messages.SoftwareConfig.PropertiesValue) gce_cluster_config = messages.GceClusterConfig( networkUri=network_ref and network_ref.SelfLink(), subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(), serviceAccount=args.service_account, serviceAccountScopes=expanded_scopes, zoneUri=zone_ref and zone_ref.SelfLink()) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict( (k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, messages.GceClusterConfig.MetadataValue) main_accelerators = [] if main_accelerator_type: main_accelerators.append( messages.AcceleratorConfig( acceleratorTypeUri=main_accelerator_type_ref and main_accelerator_type_ref.SelfLink(), acceleratorCount=main_accelerator_count)) worker_accelerators = [] if worker_accelerator_type: worker_accelerators.append( messages.AcceleratorConfig( acceleratorTypeUri=worker_accelerator_type_ref and worker_accelerator_type_ref.SelfLink(), acceleratorCount=worker_accelerator_count)) cluster_config = messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, mainConfig=messages.InstanceGroupConfig( numInstances=args.num_mains, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=main_machine_type_ref and main_machine_type_ref.SelfLink(), accelerators=main_accelerators, diskConfig=messages.DiskConfig( bootDiskSizeGb=main_boot_disk_size_gb, numLocalSsds=args.num_main_local_ssds, ), ), workerConfig=messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=worker_machine_type_ref and worker_machine_type_ref.SelfLink(), accelerators=worker_accelerators, diskConfig=messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds, ), ), initializationActions=init_actions, softwareConfig=software_config, ) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, )) ) cluster = messages.Cluster(config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(messages, args, cluster) operation = client.projects_regions_clusters.Create( messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( operation, self.context, 'Waiting for cluster creation operation') get_request = messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( messages.ClusterStatus.StateValueValuesEnum.RUNNING): log.CreatedResource(cluster_ref) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = util.ParseCluster(args.name, dataproc) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) beta = self.ReleaseTrack() == base.ReleaseTrack.BETA cluster_config = clusters.GetClusterConfig(args, dataproc, cluster_ref.projectId, compute_resources, beta) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args. async: log.status.write('Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def Run(self, args): self.ValidateArgs(args) dataproc = dp.Dataproc() cluster_ref = dataproc.ParseCluster(args.name) compute_resources = compute_helpers.GetComputeResources( self.ReleaseTrack(), args.name) master_accelerator_type = None worker_accelerator_type = None master_accelerator_count = None worker_accelerator_count = None if self.ReleaseTrack() == base.ReleaseTrack.BETA: if args.master_accelerator: master_accelerator_type = args.master_accelerator['type'] master_accelerator_count = args.master_accelerator.get('count', 1) if args.worker_accelerator: worker_accelerator_type = args.worker_accelerator['type'] worker_accelerator_count = args.worker_accelerator.get('count', 1) # Resolve non-zonal GCE resources # We will let the server resolve short names of zonal resources because # if auto zone is requested, we will not know the zone before sending the # request image_ref = args.image and compute_resources.Parse( args.image, params={'project': cluster_ref.projectId}, collection='compute.images') network_ref = args.network and compute_resources.Parse( args.network, params={'project': cluster_ref.projectId}, collection='compute.networks') subnetwork_ref = args.subnet and compute_resources.Parse( args.subnet, params={ 'project': cluster_ref.projectId, 'region': properties.VALUES.compute.region.GetOrFail, }, collection='compute.subnetworks') timeout_str = str(args.initialization_action_timeout) + 's' init_actions = [ dataproc.messages.NodeInitializationAction( executableFile=exe, executionTimeout=timeout_str) for exe in (args.initialization_actions or [])] # Increase the client timeout for each initialization action. args.timeout += args.initialization_action_timeout * len(init_actions) expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes) software_config = dataproc.messages.SoftwareConfig( imageVersion=args.image_version) master_boot_disk_size_gb = args.master_boot_disk_size_gb if args.master_boot_disk_size: master_boot_disk_size_gb = ( api_utils.BytesToGb(args.master_boot_disk_size)) worker_boot_disk_size_gb = args.worker_boot_disk_size_gb if args.worker_boot_disk_size: worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.worker_boot_disk_size)) preemptible_worker_boot_disk_size_gb = ( api_utils.BytesToGb(args.preemptible_worker_boot_disk_size)) if args.single_node: args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true' if args.properties: software_config.properties = encoding.DictToMessage( args.properties, dataproc.messages.SoftwareConfig.PropertiesValue) gce_cluster_config = dataproc.messages.GceClusterConfig( networkUri=network_ref and network_ref.SelfLink(), subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(), internalIpOnly=args.no_address, serviceAccount=args.service_account, serviceAccountScopes=expanded_scopes, zoneUri=properties.VALUES.compute.zone.GetOrFail()) if args.tags: gce_cluster_config.tags = args.tags if args.metadata: flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items()) gce_cluster_config.metadata = encoding.DictToMessage( flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue) master_accelerators = [] if master_accelerator_type: master_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=master_accelerator_type, acceleratorCount=master_accelerator_count)) worker_accelerators = [] if worker_accelerator_type: worker_accelerators.append( dataproc.messages.AcceleratorConfig( acceleratorTypeUri=worker_accelerator_type, acceleratorCount=worker_accelerator_count)) cluster_config = dataproc.messages.ClusterConfig( configBucket=args.bucket, gceClusterConfig=gce_cluster_config, masterConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_masters, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.master_machine_type, accelerators=master_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=master_boot_disk_size_gb, numLocalSsds=args.num_master_local_ssds,),), workerConfig=dataproc.messages.InstanceGroupConfig( numInstances=args.num_workers, imageUri=image_ref and image_ref.SelfLink(), machineTypeUri=args.worker_machine_type, accelerators=worker_accelerators, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=worker_boot_disk_size_gb, numLocalSsds=args.num_worker_local_ssds,),), initializationActions=init_actions, softwareConfig=software_config,) # Secondary worker group is optional. However, users may specify # future pVM disk size at creation time. if (args.num_preemptible_workers is not None or preemptible_worker_boot_disk_size_gb is not None): cluster_config.secondaryWorkerConfig = ( dataproc.messages.InstanceGroupConfig( numInstances=args.num_preemptible_workers, diskConfig=dataproc.messages.DiskConfig( bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))) cluster = dataproc.messages.Cluster( config=cluster_config, clusterName=cluster_ref.clusterName, projectId=cluster_ref.projectId) self.ConfigureCluster(dataproc.messages, args, cluster) operation = dataproc.client.projects_regions_clusters.Create( dataproc.messages.DataprocProjectsRegionsClustersCreateRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, cluster=cluster)) if args.async: log.status.write( 'Creating [{0}] with operation [{1}].'.format( cluster_ref, operation.name)) return operation = dataproc.WaitForOperation( operation, message='Waiting for cluster creation operation', timeout_s=args.timeout) get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=cluster_ref.projectId, region=cluster_ref.region, clusterName=cluster_ref.clusterName) cluster = dataproc.client.projects_regions_clusters.Get(get_request) if cluster.status.state == ( dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING): zone_uri = cluster.config.gceClusterConfig.zoneUri zone_short_name = zone_uri.split('/')[-1] # Log the URL of the cluster log.CreatedResource( cluster_ref, # Also indicate which zone the cluster was placed in. This is helpful # if the server picked a zone (auto zone) details='Cluster placed in zone [{0}]'.format(zone_short_name)) else: log.error('Create cluster failed!') if operation.details: log.error('Details:\n' + operation.details) return cluster
def GetCluster(self, cluster_name, cluster_region): return compute_helpers.GetComputeResources(base.ReleaseTrack.GA, cluster_name, cluster_region)
def GetCluster(self, cluster_name): return compute_helpers.GetComputeResources(base.ReleaseTrack.BETA, cluster_name)