Exemple #1
0
def AddArguments(parser):
    """Add arguments related to SessionsCreateRequest message.

  Add SessionsCreateRequest arguments to parser. This only includes general
  arguments for all `sessions create` commands. Session type specific
  arguments are not included, and those messages need to be passed in during
  message construction (when calling GetMessage).

  Args:
    parser: A argument parser instance.
  """
    request_id_pattern = re.compile(r'^[a-zA-Z0-9_-]{1,40}$')
    parser.add_argument(
        '--request-id',
        type=arg_parsers.CustomFunctionValidator(request_id_pattern.match, (
            'Only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens '
            '(-) are allowed. The length must not exceed 40 characters.')),
        help=('A unique ID that identifies the request. If the service '
              'receives two session create requests with the same request_id, '
              'the second request is ignored and the operation that '
              'corresponds to the first session created and stored in the '
              'backend is returned. '
              'Recommendation:  Always set this value to a UUID. '
              'The value must contain only letters (a-z, A-Z), numbers (0-9), '
              'underscores (_), and hyphens (-). The maximum length is 40 '
              'characters.'))

    _AddDependency(parser)
def AddArguments(parser, api_version):
    """Add arguments related to BatchesCreateRequest message.

  Add BatchesCreateRequest arguments to parser. This only includes general
  arguments for all `batches submit` commands. Batch job type specific
  arguments are not included, and those messages need to be passed in during
  message construction (when calling GetMessage).

  Args:
    parser: A argument parser instance.
    api_version: Api version to use.
  """
    flags.AddProjectsLocationsResourceArg(parser, api_version)

    batch_id_pattern = re.compile(r'^[a-z0-9][-a-z0-9]{2,61}[a-z0-9]$')
    parser.add_argument(
        '--batch',
        type=arg_parsers.CustomFunctionValidator(batch_id_pattern.match, (
            'Only lowercase letters (a-z), numbers (0-9), and '
            'hyphens (-) are allowed. The length must be between 4 and 63 characters.'
        )),
        help=
        ('The ID of the batch job to submit. '
         'The ID must contain only lowercase letters (a-z), numbers (0-9) and '
         'hyphens (-). The length of the name must be between 4 and 63 characters. '
         'If this argument is not provided, a random generated UUID '
         'will be used.'))

    request_id_pattern = re.compile(r'^[a-zA-Z0-9_-]{1,40}$')
    parser.add_argument(
        '--request-id',
        type=arg_parsers.CustomFunctionValidator(request_id_pattern.match, (
            'Only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens '
            '(-) are allowed. The length must not exceed 40 characters.')),
        help=('A unique ID that identifies the request. If the service '
              'receives two batch create requests with the same request_id, '
              'the second request is ignored and the operation that '
              'corresponds to the first Batch created and stored in the '
              'backend is returned. '
              'Recommendation:  Always set this value to a UUID. '
              'The value must contain only letters (a-z, A-Z), numbers (0-9), '
              'underscores (_), and hyphens (-). The maximum length is 40 '
              'characters.'))

    _AddDependency(parser)
def AddFleetProject(parser):
  parser.add_argument(
      '--fleet-project',
      type=arg_parsers.CustomFunctionValidator(
          project_util.ValidateProjectIdentifier,
          '--fleet-project must be a valid project ID or project number.'),
      required=True,
      help='ID or number of the Fleet host project where the cluster is registered.'
  )
Exemple #4
0
def AddPortFlag(parser):
  """Add port flag to override $PORT."""
  parser.add_argument(
      '--port',
      type=arg_parsers.CustomFunctionValidator(
          _PortValue,
          'must be an integer between 1 and 65535, inclusive, or "default".'),
      help='Container port to receive requests at. Also sets the $PORT '
      'environment variable. Must be a number between 1 and 65535, inclusive. '
      'To unset this field, pass the special value "default".')
Exemple #5
0
def AddConcurrencyFlag(parser):
  parser.add_argument(
      '--concurrency',
      type=arg_parsers.CustomFunctionValidator(
          _ConcurrencyValue,
          'must be an integer greater than 0 or "default".'),
      help='Set the number of concurrent requests allowed per '
      'container instance. A concurrency of 0 or unspecified indicates '
      'any number of concurrent requests are allowed. To unset '
      'this field, provide the special value `default`.')
Exemple #6
0
def _GetAppEngineRoutingKeysValidator():
  return arg_parsers.CustomFunctionValidator(
      lambda k: k in constants.APP_ENGINE_ROUTING_KEYS,
      'Only the following keys are valid for routing: [{}].'.format(
          ', '.join(constants.APP_ENGINE_ROUTING_KEYS)))
def ArgsForClusterRef(parser,
                      beta=False,
                      include_deprecated=True,
                      include_ttl_config=False,
                      include_gke_platform_args=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
    include_ttl_config: whether to include Scheduled Delete(TTL) args
    include_gke_platform_args: whether to include GKE-based cluster args
  """
    labels_util.AddCreateLabelsFlags(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)
    flags.AddComponentFlag(parser)

    platform_group = parser.add_argument_group(mutex=True)
    gce_platform_group = platform_group.add_argument_group(help="""\
    Compute Engine options for Dataproc clusters.
    """)

    instances_flags.AddTagsArgs(gce_platform_group)
    gce_platform_group.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-secondary-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--secondary-worker-type',
        hidden=True,
        metavar='TYPE',
        choices=['preemptible', 'non-preemptible', 'unspecified'],
        default='unspecified',
        help='The type of the secondary worker group.')
    num_secondary_workers = worker_group.add_argument_group(mutex=True)
    num_secondary_workers.add_argument(
        '--num-preemptible-workers',
        action=actions.DeprecationAction(
            '--num-preemptible-workers',
            warn=('The `--num-preemptible-workers` flag is deprecated. '
                  'Use the `--num-secondary-workers` flag instead.')),
        type=int,
        hidden=True,
        help='The number of preemptible worker nodes in the cluster.')
    num_secondary_workers.add_argument(
        '--num-secondary-workers',
        type=int,
        help='The number of secondary worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument('--bucket',
                        help="""\
      The Google Cloud Storage bucket to use by default to stage job
      dependencies, miscellaneous config files, and job driver console output
      when using this cluster.
      """)

    netparser = gce_platform_group.add_argument_group(mutex=True)
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    secondary_worker_local_ssds = parser.add_argument_group(mutex=True)
    secondary_worker_local_ssds.add_argument(
        '--num-preemptible-worker-local-ssds',
        type=int,
        hidden=True,
        action=actions.DeprecationAction(
            '--num-preemptible-worker-local-ssds',
            warn=(
                'The `--num-preemptible-worker-local-ssds` flag is deprecated. '
                'Use the `--num-secondary-worker-local-ssds` flag instead.')),
        help="""\
      The number of local SSDs to attach to each secondary worker in
      a cluster.
      """)
    secondary_worker_local_ssds.add_argument(
        '--num-secondary-worker-local-ssds',
        type=int,
        help="""\
      The number of local SSDs to attach to each preemptible worker in
      a cluster.
      """)
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        action=arg_parsers.UpdateAction,
                        default={},
                        metavar='PREFIX:PROPERTY=VALUE',
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    gce_platform_group.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    gce_platform_group.add_argument('--scopes',
                                    type=arg_parsers.ArgList(min_length=1),
                                    metavar='SCOPE',
                                    help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    secondary_worker_boot_disk_type = parser.add_argument_group(mutex=True)
    secondary_worker_boot_disk_type.add_argument(
        '--preemptible-worker-boot-disk-type',
        help=boot_disk_type_detailed_help,
        hidden=True,
        action=actions.DeprecationAction(
            '--preemptible-worker-boot-disk-type',
            warn=(
                'The `--preemptible-worker-boot-disk-type` flag is deprecated. '
                'Use the `--secondary-worker-boot-disk-type` flag instead.')))
    secondary_worker_boot_disk_type.add_argument(
        '--secondary-worker-boot-disk-type', help=boot_disk_type_detailed_help)

    autoscaling_group = parser.add_argument_group()
    flags.AddAutoscalingPolicyResourceArgForCluster(
        autoscaling_group, api_version=('v1beta2' if beta else 'v1'))

    if include_ttl_config:
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
          The duration before cluster is auto-deleted after last job completes,
          such as "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
          The lifespan of the cluster before it is auto-deleted, such as
          "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
          The time when cluster will be auto-deleted, such as
          "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
          information on time formats.
          """)

    AddKerberosGroup(parser)

    flags.AddMinCpuPlatformArgs(parser)

    _AddAcceleratorArgs(parser)

    AddReservationAffinityGroup(
        gce_platform_group,
        group_text='Specifies the reservation for the instance.',
        affinity_text='The type of reservation for the instance.')
    if include_gke_platform_args:
        gke_based_cluster_group = platform_group.add_argument_group(
            hidden=True,
            help="""\
          Options for creating a GKE-based Dataproc cluster. Specifying any of these
          will indicate that this cluster is intended to be a GKE-based cluster.
          These options are mutually exclusive with GCE-based options.
          """)
        gke_based_cluster_group.add_argument('--gke-cluster',
                                             hidden=True,
                                             help="""\
            Required for GKE-based clusters. Specify the name of the GKE cluster to
            deploy this GKE-based Dataproc cluster to. This should be the short name
            and not the full path name.
            """)
        gke_based_cluster_group.add_argument('--gke-cluster-namespace',
                                             hidden=True,
                                             help="""\
            Optional. Specify the name of the namespace to deploy Dataproc system
            components into. This namespace does not need to already exist.
            """)
Exemple #8
0
def ArgsForClusterRef(parser, beta=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      [format="csv",options="header"]
      |========
      Number of Masters,Cluster Mode
      1,Standard
      3,High Availability
      |========
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

[format="csv",options="header"]
|========
Prefix,File,Purpose of file
capacity-scheduler,capacity-scheduler.xml,Hadoop YARN Capacity Scheduler configuration
core,core-site.xml,Hadoop general configuration
distcp,distcp-default.xml,Hadoop Distributed Copy configuration
hadoop-env,hadoop-env.sh,Hadoop specific environment variables
hdfs,hdfs-site.xml,Hadoop HDFS configuration
hive,hive-site.xml,Hive configuration
mapred,mapred-site.xml,Hadoop MapReduce configuration
mapred-env,mapred-env.sh,Hadoop MapReduce specific environment variables
pig,pig.properties,Pig configuration
spark,spark-defaults.conf,Spark configuration
spark-env,spark-env.sh,Spark specific environment variables
yarn,yarn-site.xml,Hadoop YARN configuration
yarn-env,yarn-env.sh,Hadoop YARN specific environment variables
|========

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

[format="csv"]
|========
{minimum_scopes}
|========

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

[format="csv"]
|========
{additional_scopes}
|========

If you want to enable all scopes use the 'cloud-platform' scope.

SCOPE can be either the full URI of the scope or an alias.
Available aliases are:

[format="csv",options="header"]
|========
Alias,URI
{aliases}
|========

{scope_deprecation_msg}
""".format(minimum_scopes='\n'.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n'.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           aliases=compute_helpers.SCOPE_ALIASES_FOR_HELP,
           scope_deprecation_msg=compute_constants.DEPRECATED_SCOPES_MESSAGES))

    master_boot_disk_size = parser.add_mutually_exclusive_group()
    worker_boot_disk_size = parser.add_mutually_exclusive_group()

    # Deprecated, to be removed at a future date.
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--master-boot-disk-size-gb',
            warn=(
                'The `--master-boot-disk-size-gb` flag is deprecated. '
                'Use `--master-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--master-boot-disk-size` flag with "GB" after value.')
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--worker-boot-disk-size-gb',
            warn=(
                'The `--worker-boot-disk-size-gb` flag is deprecated. '
                'Use `--worker-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--worker-boot-disk-size` flag with "GB" after value.')

    boot_disk_size_detailed_help = """\
      The size of the boot disk. The value must be a
      whole number followed by a size unit of ``KB'' for kilobyte, ``MB''
      for megabyte, ``GB'' for gigabyte, or ``TB'' for terabyte. For example,
      ``10GB'' will produce a 10 gigabyte disk. The minimum size a boot disk
      can have is 10 GB. Disk size must be a multiple of 1 GB.
      """
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-size',
                        type=arg_parsers.BinarySize(lower_bound='10GB'),
                        help=boot_disk_size_detailed_help)

    # Args that are visible only in Beta track
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """,
                        hidden=not beta)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
Exemple #9
0
KEY_FORMAT_ERROR = (
    'Only hyphens (-), underscores (_), lowercase characters, and numbers are '
    'allowed. Keys must start with a lowercase character. International '
    'characters are allowed.')
KEY_FORMAT_HELP = (
    'Keys must start with a lowercase character and contain only hyphens '
    '(`-`), underscores (```_```), lowercase characters, and numbers.')

VALUE_FORMAT_ERROR = (
    'Only hyphens (-), underscores (_), lowercase characters, and numbers are '
    'allowed. International characters are allowed.')
VALUE_FORMAT_HELP = (
    'Values must contain only hyphens (`-`), underscores (```_```), lowercase '
    'characters, and numbers.')

KEY_FORMAT_VALIDATOR = arg_parsers.CustomFunctionValidator(
    IsValidLabelKey, KEY_FORMAT_ERROR)

VALUE_FORMAT_VALIDATOR = arg_parsers.CustomFunctionValidator(
    IsValidLabelValue, VALUE_FORMAT_ERROR)


def GetCreateLabelsFlag(extra_message='', labels_name='labels',
                        validate_values=True):
  """Makes the base.Argument for --labels flag."""
  value_type = VALUE_FORMAT_VALIDATOR if validate_values else None
  format_help = [KEY_FORMAT_HELP]
  if validate_values:
    format_help.append(VALUE_FORMAT_HELP)
  help_parts = ['List of label KEY=VALUE pairs to add.',
                ' '.join(format_help)]
  if extra_message:
Exemple #10
0
def ArgsForClusterRef(parser, beta=False, include_deprecated=True):     \
    # pylint: disable=unused-argument
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
Exemple #11
0
def _IsValidIpv4CidrBlock(ipv4_cidr_block):
  """Validates that IPV4 CIDR block arg has valid format.

  Intended to be used as an argparse validator.

  Args:
    ipv4_cidr_block: str, the IPV4 CIDR block string to validate

  Returns:
    bool, True if and only if the IPV4 CIDR block is valid
  """
  return ipaddress.IPv4Network(ipv4_cidr_block) is not None


IPV4_CIDR_BLOCK_FORMAT_VALIDATOR = arg_parsers.CustomFunctionValidator(
    _IsValidIpv4CidrBlock, _INVALID_IPV4_CIDR_BLOCK_ERROR)

CLUSTER_IPV4_CIDR_FLAG = base.Argument(
    '--cluster-ipv4-cidr',
    default=None,
    type=IPV4_CIDR_BLOCK_FORMAT_VALIDATOR,
    help="""\
    IP address range for the pods in this cluster in CIDR notation
    (e.g. 10.0.0.0/14).

    Cannot be specified unless '--enable-ip-alias' is also specified.
    """)

SERVICES_IPV4_CIDR_FLAG = base.Argument(
    '--services-ipv4-cidr',
    default=None,
Exemple #12
0
def _IsValidEnvVarName(name):
  """Validates that a user-provided arg is a valid environment variable name.

  Intended to be used as an argparse validator.

  Args:
    name: str, the environment variable name to validate

  Returns:
    bool, True if and only if the name is valid
  """
  return re.match('^[a-zA-Z_][a-zA-Z0-9_]*$', name) is not None


ENV_VAR_NAME_FORMAT_VALIDATOR = arg_parsers.CustomFunctionValidator(
    _IsValidEnvVarName, _ENV_VAR_NAME_ERROR)
CREATE_ENV_VARS_FLAG = base.Argument(
    '--env-variables',
    metavar='NAME=VALUE',
    type=arg_parsers.ArgDict(
        key_type=ENV_VAR_NAME_FORMAT_VALIDATOR, value_type=str),
    action=arg_parsers.UpdateAction,
    help='A comma-delimited list of environment variable `NAME=VALUE` '
    'pairs to provide to the Airflow scheduler, worker, and webserver '
    'processes. NAME may contain upper and lowercase letters, digits, '
    'and underscores, but they may not begin with a digit. '
    'To include commas as part of a `VALUE`, see `{top_command} topics'
    ' escaping` for information about overriding the delimiter.')


def IsValidUserPort(val):
def _GetQueueTypeArgValidator():
    return arg_parsers.CustomFunctionValidator(
        lambda k: k in constants.VALID_QUEUE_TYPES,
        'Only the following queue types are valid: [{}].'.format(', '.join(
            constants.VALID_QUEUE_TYPES)))
Exemple #14
0
def _IsValidIpv4CidrBlock(ipv4_cidr_block):
    """Validates that IPV4 CIDR block arg has valid format.

  Intended to be used as an argparse validator.

  Args:
    ipv4_cidr_block: str, the IPV4 CIDR block string to validate

  Returns:
    bool, True if and only if the IPV4 CIDR block is valid
  """
    return ipaddress.IPv4Network(ipv4_cidr_block) is not None


IPV4_CIDR_BLOCK_FORMAT_VALIDATOR = arg_parsers.CustomFunctionValidator(
    _IsValidIpv4CidrBlock, _INVALID_IPV4_CIDR_BLOCK_ERROR)

CLUSTER_IPV4_CIDR_FLAG = base.Argument('--cluster-ipv4-cidr',
                                       default=None,
                                       type=IPV4_CIDR_BLOCK_FORMAT_VALIDATOR,
                                       help="""\
    IP address range for the pods in this cluster in CIDR notation
    (e.g. 10.0.0.0/14).

    Cannot be specified unless '--enable-ip-alias' is also specified.
    """)

SERVICES_IPV4_CIDR_FLAG = base.Argument('--services-ipv4-cidr',
                                        default=None,
                                        type=IPV4_CIDR_BLOCK_FORMAT_VALIDATOR,
                                        help="""\