def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        template_ref = args.CONCEPTS.template.Parse()

        workflow_template = dataproc.GetRegionsWorkflowTemplate(
            template_ref, args.version)

        if args.cluster_name:
            cluster_name = args.cluster_name
        else:
            cluster_name = template_ref.workflowTemplatesId

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), cluster_name)

        beta = self.ReleaseTrack() == base.ReleaseTrack.BETA
        cluster_config = clusters.GetClusterConfig(args, dataproc,
                                                   template_ref.projectsId,
                                                   compute_resources, beta)

        labels = labels_util.ParseCreateArgs(
            args, dataproc.messages.ManagedCluster.LabelsValue)

        managed_cluster = dataproc.messages.ManagedCluster(
            clusterName=cluster_name, config=cluster_config, labels=labels)

        workflow_template.placement = dataproc.messages.WorkflowTemplatePlacement(
            managedCluster=managed_cluster)

        response = dataproc.client.projects_regions_workflowTemplates.Update(
            workflow_template)
        return response
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        template = util.ParseWorkflowTemplates(args.template, dataproc)

        workflow_template = dataproc.GetRegionsWorkflowTemplate(
            template, args.version)

        cluster_name = template.workflowTemplatesId

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), cluster_name)
        use_accelerators = self.ReleaseTrack() == base.ReleaseTrack.BETA
        use_auto_delete_ttl = self.ReleaseTrack() == base.ReleaseTrack.BETA
        use_min_cpu_platform = self.ReleaseTrack() == base.ReleaseTrack.BETA

        cluster_config = clusters.GetClusterConfig(
            args, dataproc, template.projectsId, compute_resources,
            use_accelerators, use_auto_delete_ttl, use_min_cpu_platform)

        labels = labels_util.ParseCreateArgs(
            args, dataproc.messages.ManagedCluster.LabelsValue)

        managed_cluster = dataproc.messages.ManagedCluster(
            clusterName=cluster_name, config=cluster_config, labels=labels)

        workflow_template.placement = dataproc.messages.WorkflowTemplatePlacement(
            managedCluster=managed_cluster)

        response = dataproc.client.projects_regions_workflowTemplates.Update(
            workflow_template)
        return response
Beispiel #3
0
    def Run(self, args):
        self.ValidateArgs(args)

        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = args.CONCEPTS.cluster.Parse()

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), cluster_ref.clusterName, cluster_ref.region)

        cluster_config = clusters.GetClusterConfig(args,
                                                   dataproc,
                                                   cluster_ref.projectId,
                                                   compute_resources,
                                                   self.BETA,
                                                   include_ttl_config=True)

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        self.ConfigureCluster(dataproc.messages, args, cluster)

        return clusters.CreateCluster(dataproc, cluster_ref, cluster,
                                      args.async_, args.timeout)
  def Run(self, args):
    self.ValidateArgs(args)

    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = args.CONCEPTS.cluster.Parse()

    compute_resources = compute_helpers.GetComputeResources(
        self.ReleaseTrack(), cluster_ref.clusterName, cluster_ref.region)

    cluster_config = clusters.GetClusterConfig(
        args,
        dataproc,
        cluster_ref.projectId,
        compute_resources,
        self.BETA,
        include_ttl_config=True,
        include_gke_platform_args=self.BETA)

    action_on_failed_primary_workers = None
    if not self.BETA:
      action_on_failed_primary_workers = arg_utils.ChoiceToEnum(
          args.action_on_failed_primary_workers,
          dataproc.messages.DataprocProjectsRegionsClustersCreateRequest
          .ActionOnFailedPrimaryWorkersValueValuesEnum)

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        projectId=cluster_ref.projectId)

    self.ConfigureCluster(dataproc.messages, args, cluster)

    return clusters.CreateCluster(
        dataproc,
        cluster_ref,
        cluster,
        args.async_,
        args.timeout,
        enable_create_on_gke=self.BETA,
        action_on_failed_primary_workers=action_on_failed_primary_workers)
Beispiel #5
0
  def Run(self, args):
    self.ValidateArgs(args)

    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = util.ParseCluster(args.name, dataproc)

    compute_resources = compute_helpers.GetComputeResources(
        self.ReleaseTrack(), args.name)

    beta = self.ReleaseTrack() == base.ReleaseTrack.BETA
    cluster_config = clusters.GetClusterConfig(
        args, dataproc, cluster_ref.projectId, compute_resources, beta)

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        projectId=cluster_ref.projectId)

    self.ConfigureCluster(dataproc.messages, args, cluster)

    return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
Beispiel #6
0
    def Run(self, args):
        self.ValidateArgs(args)

        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        main_accelerator_type = None
        worker_accelerator_type = None
        main_accelerator_count = None
        worker_accelerator_count = None
        if self.ReleaseTrack() == base.ReleaseTrack.BETA:
            if args.main_accelerator:
                main_accelerator_type = args.main_accelerator['type']
                main_accelerator_count = args.main_accelerator.get('count', 1)
            if args.worker_accelerator:
                worker_accelerator_type = args.worker_accelerator['type']
                worker_accelerator_count = args.worker_accelerator.get(
                    'count', 1)

        # Resolve GCE resources
        zone_ref = compute_resources.Parse(None, collection='compute.zones')
        image_ref = args.image and compute_resources.Parse(
            args.image, collection='compute.images')
        main_machine_type_ref = (args.main_machine_type
                                 and compute_resources.Parse(
                                     args.main_machine_type,
                                     collection='compute.machineTypes'))
        worker_machine_type_ref = (args.worker_machine_type
                                   and compute_resources.Parse(
                                       args.worker_machine_type,
                                       collection='compute.machineTypes'))
        network_ref = args.network and compute_resources.Parse(
            args.network, collection='compute.networks')
        subnetwork_ref = args.subnet and compute_resources.Parse(
            args.subnet, collection='compute.subnetworks')
        main_accelerator_type_ref = (
            main_accelerator_type and compute_resources.Parse(
                main_accelerator_type, collection='compute.acceleratorTypes'))
        worker_accelerator_type_ref = (
            worker_accelerator_type
            and compute_resources.Parse(worker_accelerator_type,
                                        collection='compute.acceleratorTypes'))

        init_actions = []
        timeout_str = str(args.initialization_action_timeout) + 's'
        if args.initialization_actions:
            init_actions = [
                messages.NodeInitializationAction(executableFile=exe,
                                                  executionTimeout=timeout_str)
                for exe in args.initialization_actions
            ]
        expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

        software_config = messages.SoftwareConfig(
            imageVersion=args.image_version)

        main_boot_disk_size_gb = args.main_boot_disk_size_gb
        if args.main_boot_disk_size:
            main_boot_disk_size_gb = (api_utils.BytesToGb(
                args.main_boot_disk_size))

        worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
        if args.worker_boot_disk_size:
            worker_boot_disk_size_gb = (api_utils.BytesToGb(
                args.worker_boot_disk_size))

        preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb(
            args.preemptible_worker_boot_disk_size))

        if args.single_node:
            args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true'

        if args.properties:
            software_config.properties = encoding.DictToMessage(
                args.properties, messages.SoftwareConfig.PropertiesValue)

        gce_cluster_config = messages.GceClusterConfig(
            networkUri=network_ref and network_ref.SelfLink(),
            subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(),
            serviceAccount=args.service_account,
            serviceAccountScopes=expanded_scopes,
            zoneUri=zone_ref and zone_ref.SelfLink())

        if args.tags:
            gce_cluster_config.tags = args.tags

        if args.metadata:
            flat_metadata = dict(
                (k, v) for d in args.metadata for k, v in d.items())
            gce_cluster_config.metadata = encoding.DictToMessage(
                flat_metadata, messages.GceClusterConfig.MetadataValue)

        main_accelerators = []
        if main_accelerator_type:
            main_accelerators.append(
                messages.AcceleratorConfig(
                    acceleratorTypeUri=main_accelerator_type_ref
                    and main_accelerator_type_ref.SelfLink(),
                    acceleratorCount=main_accelerator_count))
        worker_accelerators = []
        if worker_accelerator_type:
            worker_accelerators.append(
                messages.AcceleratorConfig(
                    acceleratorTypeUri=worker_accelerator_type_ref
                    and worker_accelerator_type_ref.SelfLink(),
                    acceleratorCount=worker_accelerator_count))

        cluster_config = messages.ClusterConfig(
            configBucket=args.bucket,
            gceClusterConfig=gce_cluster_config,
            mainConfig=messages.InstanceGroupConfig(
                numInstances=args.num_mains,
                imageUri=image_ref and image_ref.SelfLink(),
                machineTypeUri=main_machine_type_ref
                and main_machine_type_ref.SelfLink(),
                accelerators=main_accelerators,
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=main_boot_disk_size_gb,
                    numLocalSsds=args.num_main_local_ssds,
                ),
            ),
            workerConfig=messages.InstanceGroupConfig(
                numInstances=args.num_workers,
                imageUri=image_ref and image_ref.SelfLink(),
                machineTypeUri=worker_machine_type_ref
                and worker_machine_type_ref.SelfLink(),
                accelerators=worker_accelerators,
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=worker_boot_disk_size_gb,
                    numLocalSsds=args.num_worker_local_ssds,
                ),
            ),
            initializationActions=init_actions,
            softwareConfig=software_config,
        )

        # Secondary worker group is optional. However, users may specify
        # future pVM disk size at creation time.
        if (args.num_preemptible_workers is not None
                or preemptible_worker_boot_disk_size_gb is not None):
            cluster_config.secondaryWorkerConfig = (
                messages.InstanceGroupConfig(
                    numInstances=args.num_preemptible_workers,
                    diskConfig=messages.DiskConfig(
                        bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))
            )

        cluster = messages.Cluster(config=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        self.ConfigureCluster(messages, args, cluster)

        operation = client.projects_regions_clusters.Create(
            messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster creation operation')

        get_request = messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = client.projects_regions_clusters.Get(get_request)
        if cluster.status.state == (
                messages.ClusterStatus.StateValueValuesEnum.RUNNING):
            log.CreatedResource(cluster_ref)
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Beispiel #7
0
    def Run(self, args):
        self.ValidateArgs(args)

        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        beta = self.ReleaseTrack() == base.ReleaseTrack.BETA
        cluster_config = clusters.GetClusterConfig(args, dataproc,
                                                   cluster_ref.projectId,
                                                   compute_resources, beta)

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        self.ConfigureCluster(dataproc.messages, args, cluster)

        operation = dataproc.client.projects_regions_clusters.Create(
            dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster creation operation',
            timeout_s=args.timeout)

        get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(get_request)
        if cluster.status.state == (
                dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

            zone_uri = cluster.config.gceClusterConfig.zoneUri
            zone_short_name = zone_uri.split('/')[-1]

            # Log the URL of the cluster
            log.CreatedResource(
                cluster_ref,
                # Also indicate which zone the cluster was placed in. This is helpful
                # if the server picked a zone (auto zone)
                details='Cluster placed in zone [{0}]'.format(zone_short_name))
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Beispiel #8
0
  def Run(self, args):
    self.ValidateArgs(args)

    dataproc = dp.Dataproc()

    cluster_ref = dataproc.ParseCluster(args.name)

    compute_resources = compute_helpers.GetComputeResources(
        self.ReleaseTrack(), args.name)

    master_accelerator_type = None
    worker_accelerator_type = None
    master_accelerator_count = None
    worker_accelerator_count = None
    if self.ReleaseTrack() == base.ReleaseTrack.BETA:
      if args.master_accelerator:
        master_accelerator_type = args.master_accelerator['type']
        master_accelerator_count = args.master_accelerator.get('count', 1)
      if args.worker_accelerator:
        worker_accelerator_type = args.worker_accelerator['type']
        worker_accelerator_count = args.worker_accelerator.get('count', 1)

    # Resolve non-zonal GCE resources
    # We will let the server resolve short names of zonal resources because
    # if auto zone is requested, we will not know the zone before sending the
    # request
    image_ref = args.image and compute_resources.Parse(
        args.image,
        params={'project': cluster_ref.projectId},
        collection='compute.images')
    network_ref = args.network and compute_resources.Parse(
        args.network,
        params={'project': cluster_ref.projectId},
        collection='compute.networks')
    subnetwork_ref = args.subnet and compute_resources.Parse(
        args.subnet,
        params={
            'project': cluster_ref.projectId,
            'region': properties.VALUES.compute.region.GetOrFail,
        },
        collection='compute.subnetworks')
    timeout_str = str(args.initialization_action_timeout) + 's'
    init_actions = [
        dataproc.messages.NodeInitializationAction(
            executableFile=exe, executionTimeout=timeout_str)
        for exe in (args.initialization_actions or [])]
    # Increase the client timeout for each initialization action.
    args.timeout += args.initialization_action_timeout * len(init_actions)

    expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

    software_config = dataproc.messages.SoftwareConfig(
        imageVersion=args.image_version)

    master_boot_disk_size_gb = args.master_boot_disk_size_gb
    if args.master_boot_disk_size:
      master_boot_disk_size_gb = (
          api_utils.BytesToGb(args.master_boot_disk_size))

    worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
    if args.worker_boot_disk_size:
      worker_boot_disk_size_gb = (
          api_utils.BytesToGb(args.worker_boot_disk_size))

    preemptible_worker_boot_disk_size_gb = (
        api_utils.BytesToGb(args.preemptible_worker_boot_disk_size))

    if args.single_node:
      args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true'

    if args.properties:
      software_config.properties = encoding.DictToMessage(
          args.properties, dataproc.messages.SoftwareConfig.PropertiesValue)

    gce_cluster_config = dataproc.messages.GceClusterConfig(
        networkUri=network_ref and network_ref.SelfLink(),
        subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(),
        internalIpOnly=args.no_address,
        serviceAccount=args.service_account,
        serviceAccountScopes=expanded_scopes,
        zoneUri=properties.VALUES.compute.zone.GetOrFail())

    if args.tags:
      gce_cluster_config.tags = args.tags

    if args.metadata:
      flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items())
      gce_cluster_config.metadata = encoding.DictToMessage(
          flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue)

    master_accelerators = []
    if master_accelerator_type:
      master_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=master_accelerator_type,
              acceleratorCount=master_accelerator_count))
    worker_accelerators = []
    if worker_accelerator_type:
      worker_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=worker_accelerator_type,
              acceleratorCount=worker_accelerator_count))

    cluster_config = dataproc.messages.ClusterConfig(
        configBucket=args.bucket,
        gceClusterConfig=gce_cluster_config,
        masterConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_masters,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.master_machine_type,
            accelerators=master_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=master_boot_disk_size_gb,
                numLocalSsds=args.num_master_local_ssds,),),
        workerConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_workers,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.worker_machine_type,
            accelerators=worker_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=worker_boot_disk_size_gb,
                numLocalSsds=args.num_worker_local_ssds,),),
        initializationActions=init_actions,
        softwareConfig=software_config,)

    # Secondary worker group is optional. However, users may specify
    # future pVM disk size at creation time.
    if (args.num_preemptible_workers is not None or
        preemptible_worker_boot_disk_size_gb is not None):
      cluster_config.secondaryWorkerConfig = (
          dataproc.messages.InstanceGroupConfig(
              numInstances=args.num_preemptible_workers,
              diskConfig=dataproc.messages.DiskConfig(
                  bootDiskSizeGb=preemptible_worker_boot_disk_size_gb,
              )))

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        projectId=cluster_ref.projectId)

    self.ConfigureCluster(dataproc.messages, args, cluster)

    operation = dataproc.client.projects_regions_clusters.Create(
        dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            cluster=cluster))

    if args.async:
      log.status.write(
          'Creating [{0}] with operation [{1}].'.format(
              cluster_ref, operation.name))
      return

    operation = dataproc.WaitForOperation(
        operation,
        message='Waiting for cluster creation operation',
        timeout_s=args.timeout)

    get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)
    cluster = dataproc.client.projects_regions_clusters.Get(get_request)
    if cluster.status.state == (
        dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

      zone_uri = cluster.config.gceClusterConfig.zoneUri
      zone_short_name = zone_uri.split('/')[-1]

      # Log the URL of the cluster
      log.CreatedResource(
          cluster_ref,
          # Also indicate which zone the cluster was placed in. This is helpful
          # if the server picked a zone (auto zone)
          details='Cluster placed in zone [{0}]'.format(zone_short_name))
    else:
      log.error('Create cluster failed!')
      if operation.details:
        log.error('Details:\n' + operation.details)
    return cluster
Beispiel #9
0
 def GetCluster(self, cluster_name, cluster_region):
     return compute_helpers.GetComputeResources(base.ReleaseTrack.GA,
                                                cluster_name,
                                                cluster_region)
Beispiel #10
0
 def GetCluster(self, cluster_name):
     return compute_helpers.GetComputeResources(base.ReleaseTrack.BETA,
                                                cluster_name)