예제 #1
0
def create_cluster(cmd, client, cluster_name, resource_group_name, cluster_type,
                   location=None, tags=None, no_wait=False, cluster_version='default', cluster_tier=None,
                   cluster_configurations=None, component_version=None,
                   headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None,
                   workernode_count=3, workernode_data_disks_per_node=None,
                   workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None,
                   http_username=None, http_password=None,
                   ssh_username='******', ssh_password=None, ssh_public_key=None,
                   storage_account=None, storage_account_key=None,
                   storage_default_container=None, storage_default_filesystem=None,
                   storage_account_managed_identity=None,
                   vnet_name=None, subnet=None,
                   domain=None, ldaps_urls=None,
                   cluster_admin_account=None, cluster_admin_password=None,
                   cluster_users_group_dns=None,
                   assign_identity=None,
                   encryption_vault_uri=None, encryption_key_name=None, encryption_key_version=None,
                   encryption_algorithm='RSA-OAEP', esp=False, no_validation_timeout=False):
    from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \
        get_storage_account_endpoint, validate_esp_cluster_create_params
    from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \
        ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \
        StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \
        DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey

    validate_esp_cluster_create_params(esp, cluster_name, resource_group_name, cluster_type,
                                       subnet, domain, cluster_admin_account, assign_identity,
                                       ldaps_urls, cluster_admin_password, cluster_users_group_dns)

    if esp:
        if cluster_tier == Tier.standard:
            raise CLIError('Cluster tier cannot be {} when --esp is specified. '
                           'Please use default value or specify {} explicitly.'.format(Tier.standard, Tier.premium))
        if not cluster_tier:
            cluster_tier = Tier.premium

    # Update optional parameters with defaults
    location = location or _get_rg_location(cmd.cli_ctx, resource_group_name)

    # Format dictionary/free-form arguments
    if cluster_configurations:
        import json
        try:
            cluster_configurations = json.loads(cluster_configurations)
        except ValueError as ex:
            raise CLIError('The cluster_configurations argument must be valid JSON. Error: {}'.format(str(ex)))
    else:
        cluster_configurations = dict()
    if component_version:
        # See validator
        component_version = {c: v for c, v in [version.split('=') for version in component_version]}

    # Validate whether HTTP credentials were provided
    if 'gateway' in cluster_configurations:
        gateway_config = cluster_configurations['gateway']
    else:
        gateway_config = dict()
    if http_username and 'restAuthCredential.username' in gateway_config:
        raise CLIError('An HTTP username must be specified either as a command-line parameter '
                       'or in the cluster configuration, but not both.')
    if not http_username:
        http_username = '******'  # Implement default logic here, in case a user specifies the username in configurations

    if not http_password:
        try:
            http_password = prompt_pass('HTTP password for the cluster:', confirm=True)
        except NoTTYException:
            raise CLIError('Please specify --http-password in non-interactive mode.')

    # Update the cluster config with the HTTP credentials
    gateway_config['restAuthCredential.isEnabled'] = 'true'  # HTTP credentials are required
    http_username = http_username or gateway_config['restAuthCredential.username']
    gateway_config['restAuthCredential.username'] = http_username
    gateway_config['restAuthCredential.password'] = http_password
    cluster_configurations['gateway'] = gateway_config

    # Validate whether SSH credentials were provided
    if not (ssh_password or ssh_public_key):
        logger.warning("SSH credentials not specified. Using the HTTP password as the SSH password.")
        ssh_password = http_password

    # Validate storage arguments from the user
    if storage_default_container and storage_default_filesystem:
        raise CLIError('Either the default container or the default filesystem can be specified, but not both.')

    # Retrieve primary blob service endpoint
    is_wasb = not storage_account_managed_identity
    storage_account_endpoint = None
    if storage_account:
        storage_account_endpoint = get_storage_account_endpoint(cmd, storage_account, is_wasb)

    # Attempt to infer the storage account key from the endpoint
    if not storage_account_key and storage_account and is_wasb:
        from .util import get_key_for_storage_account
        logger.info('Storage account key not specified. Attempting to retrieve key...')
        key = get_key_for_storage_account(cmd, storage_account)
        if not key:
            raise CLIError('Storage account key could not be inferred from storage account.')
        storage_account_key = key

    # Attempt to provide a default container for WASB storage accounts
    if not storage_default_container and is_wasb:
        storage_default_container = cluster_name.lower()
        logger.warning('Default WASB container not specified, using "%s".', storage_default_container)
    elif not storage_default_filesystem and not is_wasb:
        storage_default_filesystem = cluster_name.lower()
        logger.warning('Default ADLS file system not specified, using "%s".', storage_default_filesystem)

    # Validate storage info parameters
    if is_wasb and not _all_or_none(storage_account, storage_account_key, storage_default_container):
        raise CLIError('If storage details are specified, the storage account, storage account key, '
                       'and the default container must be specified.')
    if not is_wasb and not _all_or_none(storage_account, storage_default_filesystem):
        raise CLIError('If storage details are specified, the storage account, '
                       'and the default filesystem must be specified.')

    # Validate disk encryption parameters
    if not _all_or_none(encryption_vault_uri, encryption_key_name, encryption_key_version):
        raise CLIError('Either the encryption vault URI, key name and key version should be specified, '
                       'or none of them should be.')

    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = subnet and build_virtual_network_profile(subnet)

    # Validate data disk parameters
    if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type:
        raise CLIError("Cannot define data disk storage account type unless disks per node is defined.")
    if not workernode_data_disks_per_node and workernode_data_disk_size:
        raise CLIError("Cannot define data disk size unless disks per node is defined.")
    # Specify data disk groups only when disk arguments are provided
    workernode_data_disk_groups = workernode_data_disks_per_node and [
        DataDisksGroups(
            disks_per_node=workernode_data_disks_per_node,
            storage_account_type=workernode_data_disk_storage_account_type,
            disk_size_gb=workernode_data_disk_size
        )
    ]

    os_profile = OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_profile=ssh_public_key and SshProfile(
                public_keys=[SshPublicKey(
                    certificate_data=ssh_public_key
                )]
            )
        )
    )

    roles = [
        # Required roles
        Role(
            name="headnode",
            target_instance_count=2,
            hardware_profile=HardwareProfile(vm_size=headnode_size),
            os_profile=os_profile,
            virtual_network_profile=virtual_network_profile
        ),
        Role(
            name="workernode",
            target_instance_count=workernode_count,
            hardware_profile=HardwareProfile(vm_size=workernode_size),
            os_profile=os_profile,
            virtual_network_profile=virtual_network_profile,
            data_disks_groups=workernode_data_disk_groups
        )
    ]
    if zookeepernode_size:
        roles.append(
            Role(
                name="zookeepernode",
                target_instance_count=3,
                hardware_profile=HardwareProfile(vm_size=zookeepernode_size),
                os_profile=os_profile,
                virtual_network_profile=virtual_network_profile
            ))
    if edgenode_size:
        roles.append(
            Role(
                name="edgenode",
                target_instance_count=1,
                hardware_profile=HardwareProfile(vm_size=edgenode_size),
                os_profile=os_profile,
                virtual_network_profile=virtual_network_profile
            ))

    storage_accounts = []
    if storage_account:
        # Specify storage account details only when storage arguments are provided
        storage_accounts.append(
            StorageAccount(
                name=storage_account_endpoint,
                key=storage_account_key,
                container=storage_default_container,
                file_system=storage_default_filesystem,
                resource_id=None if is_wasb else storage_account,
                msi_resource_id=storage_account_managed_identity,
                is_default=True
            )
        )

    additional_storage_accounts = []  # TODO: Add support for additional storage accounts
    if additional_storage_accounts:
        storage_accounts += [
            StorageAccount(
                name=s.storage_account_endpoint,
                key=s.storage_account_key,
                container=s.container,
                is_default=False
            )
            for s in additional_storage_accounts
        ]

    assign_identities = []
    if assign_identity:
        assign_identities.append(assign_identity)

    if storage_account_managed_identity:
        assign_identities.append(storage_account_managed_identity)

    cluster_identity = build_identities_info(assign_identities) if assign_identities else None

    domain_name = domain and parse_domain_name(domain)
    if not ldaps_urls and domain_name:
        ldaps_urls = ['ldaps://{}:636'.format(domain_name)]

    security_profile = domain and SecurityProfile(
        directory_type=DirectoryType.active_directory,
        domain=domain_name,
        ldaps_urls=ldaps_urls,
        domain_username=cluster_admin_account,
        domain_user_password=cluster_admin_password,
        cluster_users_group_dns=cluster_users_group_dns,
        aadds_resource_id=domain,
        msi_resource_id=assign_identity
    )

    disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties(
        vault_uri=encryption_vault_uri,
        key_name=encryption_key_name,
        key_version=encryption_key_version,
        encryption_algorithm=encryption_algorithm,
        msi_resource_id=assign_identity
    )

    create_params = ClusterCreateParametersExtended(
        location=location,
        tags=tags,
        properties=ClusterCreateProperties(
            cluster_version=cluster_version,
            os_type=OSType.linux,
            tier=cluster_tier,
            cluster_definition=ClusterDefinition(
                kind=cluster_type,
                configurations=cluster_configurations,
                component_version=component_version
            ),
            compute_profile=ComputeProfile(
                roles=roles
            ),
            storage_profile=StorageProfile(
                storageaccounts=storage_accounts
            ),
            security_profile=security_profile,
            disk_encryption_properties=disk_encryption_properties
        ),
        identity=cluster_identity
    )

    if no_wait:
        return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params)

    return client.create(resource_group_name, cluster_name, create_params)
예제 #2
0
def create_cluster(cmd,
                   client,
                   cluster_name,
                   resource_group_name,
                   cluster_type,
                   location=None,
                   tags=None,
                   no_wait=False,
                   cluster_version='default',
                   cluster_tier=None,
                   cluster_configurations=None,
                   component_version=None,
                   headnode_size=None,
                   workernode_size=None,
                   zookeepernode_size=None,
                   edgenode_size=None,
                   kafka_management_node_size=None,
                   kafka_management_node_count=2,
                   kafka_client_group_id=None,
                   kafka_client_group_name=None,
                   workernode_count=3,
                   workernode_data_disks_per_node=None,
                   workernode_data_disk_storage_account_type=None,
                   workernode_data_disk_size=None,
                   http_username=None,
                   http_password=None,
                   ssh_username='******',
                   ssh_password=None,
                   ssh_public_key=None,
                   storage_account=None,
                   storage_account_key=None,
                   storage_default_container=None,
                   storage_default_filesystem=None,
                   storage_account_managed_identity=None,
                   vnet_name=None,
                   subnet=None,
                   domain=None,
                   ldaps_urls=None,
                   cluster_admin_account=None,
                   cluster_admin_password=None,
                   cluster_users_group_dns=None,
                   assign_identity=None,
                   minimal_tls_version=None,
                   encryption_vault_uri=None,
                   encryption_key_name=None,
                   encryption_key_version=None,
                   encryption_algorithm='RSA-OAEP',
                   encryption_in_transit=None,
                   autoscale_type=None,
                   autoscale_min_workernode_count=None,
                   autoscale_max_workernode_count=None,
                   timezone=None,
                   days=None,
                   time=None,
                   autoscale_workernode_count=None,
                   encryption_at_host=None,
                   esp=False,
                   idbroker=False,
                   resource_provider_connection=None,
                   enable_private_link=None,
                   enable_compute_isolation=None,
                   host_sku=None,
                   no_validation_timeout=False):
    from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \
        get_storage_account_endpoint, validate_esp_cluster_create_params, set_vm_size
    from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \
        ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \
        StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \
        DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey, \
        KafkaRestProperties, ClientGroupInfo, EncryptionInTransitProperties, \
        Autoscale, AutoscaleCapacity, AutoscaleRecurrence, AutoscaleSchedule, AutoscaleTimeAndCapacity, \
        NetworkProperties, PrivateLink, ComputeIsolationProperties

    validate_esp_cluster_create_params(esp, cluster_name, resource_group_name,
                                       cluster_type, subnet, domain,
                                       cluster_admin_account, assign_identity,
                                       ldaps_urls, cluster_admin_password,
                                       cluster_users_group_dns)

    if esp:
        if cluster_tier == Tier.standard:
            raise CLIError(
                'Cluster tier cannot be {} when --esp is specified. '
                'Please use default value or specify {} explicitly.'.format(
                    Tier.standard, Tier.premium))
        if not cluster_tier:
            cluster_tier = Tier.premium

    # Update optional parameters with defaults
    location = location or _get_rg_location(cmd.cli_ctx, resource_group_name)

    # Format dictionary/free-form arguments
    if not cluster_configurations:
        cluster_configurations = dict()

    if component_version:
        # See validator
        component_version = {
            c: v
            for c, v in [version.split('=') for version in component_version]
        }

    # Validate whether HTTP credentials were provided
    if 'gateway' in cluster_configurations:
        gateway_config = cluster_configurations['gateway']
    else:
        gateway_config = dict()
    if http_username and 'restAuthCredential.username' in gateway_config:
        raise CLIError(
            'An HTTP username must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    if not http_username:
        http_username = '******'  # Implement default logic here, in case a user specifies the username in configurations

    if not http_password:
        try:
            http_password = prompt_pass('HTTP password for the cluster:',
                                        confirm=True)
        except NoTTYException:
            raise CLIError(
                'Please specify --http-password in non-interactive mode.')

    # Update the cluster config with the HTTP credentials
    gateway_config[
        'restAuthCredential.isEnabled'] = 'true'  # HTTP credentials are required
    http_username = http_username or gateway_config[
        'restAuthCredential.username']
    gateway_config['restAuthCredential.username'] = http_username
    gateway_config['restAuthCredential.password'] = http_password
    cluster_configurations['gateway'] = gateway_config

    # Validate whether SSH credentials were provided
    if not (ssh_password or ssh_public_key):
        logger.warning(
            "SSH credentials not specified. Using the HTTP password as the SSH password."
        )
        ssh_password = http_password

    # Validate storage arguments from the user
    if storage_default_container and storage_default_filesystem:
        raise CLIError(
            'Either the default container or the default filesystem can be specified, but not both.'
        )

    # Retrieve primary blob service endpoint
    is_wasb = not storage_account_managed_identity
    storage_account_endpoint = None
    if storage_account:
        storage_account_endpoint = get_storage_account_endpoint(
            cmd, storage_account, is_wasb)

    # Attempt to infer the storage account key from the endpoint
    if not storage_account_key and storage_account and is_wasb:
        from .util import get_key_for_storage_account
        logger.info(
            'Storage account key not specified. Attempting to retrieve key...')
        key = get_key_for_storage_account(cmd, storage_account)
        if not key:
            raise CLIError(
                'Storage account key could not be inferred from storage account.'
            )
        storage_account_key = key

    # Attempt to provide a default container for WASB storage accounts
    if not storage_default_container and storage_account and is_wasb:
        storage_default_container = cluster_name.lower()
        logger.warning('Default WASB container not specified, using "%s".',
                       storage_default_container)
    elif not storage_default_filesystem and not is_wasb:
        storage_default_filesystem = cluster_name.lower()
        logger.warning('Default ADLS file system not specified, using "%s".',
                       storage_default_filesystem)

    # Validate storage info parameters
    if is_wasb and not _all_or_none(storage_account, storage_account_key,
                                    storage_default_container):
        raise CLIError(
            'If storage details are specified, the storage account, storage account key, '
            'and the default container must be specified.')
    if not is_wasb and not _all_or_none(storage_account,
                                        storage_default_filesystem):
        raise CLIError(
            'If storage details are specified, the storage account, '
            'and the default filesystem must be specified.')

    # Validate disk encryption parameters
    if not _all_or_none(encryption_vault_uri, encryption_key_name,
                        encryption_key_version):
        raise CLIError(
            'Either the encryption vault URI, key name and key version should be specified, '
            'or none of them should be.')

    # Validate kafka rest proxy parameters
    if not _all_or_none(kafka_client_group_id, kafka_client_group_name):
        raise CLIError(
            'Either the kafka client group id and kafka client group name should be specified, '
            'or none of them should be')

    # Validate and initialize autoscale setting
    autoscale_configuration = None
    load_based_type = "Load"
    schedule_based_type = "Schedule"
    if autoscale_type and autoscale_type.lower() == load_based_type.lower():
        if not all(
            [autoscale_min_workernode_count, autoscale_max_workernode_count]):
            raise CLIError(
                'When the --autoscale-type is Load, '
                'both --autoscale-min-workernode-count and --autoscale-max-workernode-count should be specified.'
            )

        autoscale_configuration = Autoscale(capacity=AutoscaleCapacity(
            min_instance_count=autoscale_min_workernode_count,
            max_instance_count=autoscale_max_workernode_count))
    elif autoscale_type and autoscale_type.lower(
    ) == schedule_based_type.lower():
        if not all([timezone, days, time, autoscale_workernode_count]):
            raise CLIError(
                'When the --autoscale-type is Schedule, all of the --timezone, --days, --time, '
                '--autoscale-workernode-count should be specified.')

        autoscale_configuration = Autoscale(recurrence=AutoscaleRecurrence(
            time_zone=timezone,
            schedule=[
                AutoscaleSchedule(
                    days=days,
                    time_and_capacity=AutoscaleTimeAndCapacity(
                        time=time,
                        min_instance_count=autoscale_workernode_count,
                        max_instance_count=autoscale_workernode_count))
            ]))

    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = subnet and build_virtual_network_profile(subnet)

    # Validate data disk parameters
    if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type:
        raise CLIError(
            "Cannot define data disk storage account type unless disks per node is defined."
        )
    if not workernode_data_disks_per_node and workernode_data_disk_size:
        raise CLIError(
            "Cannot define data disk size unless disks per node is defined.")
    # Specify data disk groups only when disk arguments are provided
    workernode_data_disk_groups = workernode_data_disks_per_node and [
        DataDisksGroups(
            disks_per_node=workernode_data_disks_per_node,
            storage_account_type=workernode_data_disk_storage_account_type,
            disk_size_gb=workernode_data_disk_size)
    ]

    # call get default vm size api to set vm size if customer does not provide the value
    if not (workernode_size and headnode_size):
        headnode_size, workernode_size = set_vm_size(cmd.cli_ctx, location,
                                                     cluster_type,
                                                     headnode_size,
                                                     workernode_size)

    if not headnode_size:
        raise RequiredArgumentMissingError(
            'Please specify --headnode-size explicitly.')
    if not workernode_size:
        raise RequiredArgumentMissingError(
            'Please specify --workernode-size explicitly.')

    os_profile = OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_profile=ssh_public_key and SshProfile(
                public_keys=[SshPublicKey(certificate_data=ssh_public_key)])))

    roles = [
        # Required roles
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size=headnode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile),
        Role(name="workernode",
             target_instance_count=workernode_count,
             hardware_profile=HardwareProfile(vm_size=workernode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile,
             data_disks_groups=workernode_data_disk_groups,
             autoscale_configuration=autoscale_configuration)
    ]

    if zookeepernode_size:
        roles.append(
            Role(name="zookeepernode",
                 target_instance_count=3,
                 hardware_profile=HardwareProfile(vm_size=zookeepernode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))
    if edgenode_size:
        roles.append(
            Role(name="edgenode",
                 target_instance_count=1,
                 hardware_profile=HardwareProfile(vm_size=edgenode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))
    if kafka_management_node_size:
        # generate kafkaRestProperties
        roles.append(
            Role(name="kafkamanagementnode",
                 target_instance_count=kafka_management_node_count,
                 hardware_profile=HardwareProfile(
                     vm_size=kafka_management_node_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))

    if esp and idbroker:
        roles.append(
            Role(name="idbrokernode",
                 target_instance_count=2,
                 virtual_network_profile=virtual_network_profile))

    storage_accounts = []
    if storage_account:
        # Specify storage account details only when storage arguments are provided
        storage_accounts.append(
            StorageAccount(name=storage_account_endpoint,
                           key=storage_account_key,
                           container=storage_default_container,
                           file_system=storage_default_filesystem,
                           resource_id=storage_account,
                           msi_resource_id=storage_account_managed_identity,
                           is_default=True))

    additional_storage_accounts = [
    ]  # TODO: Add support for additional storage accounts
    if additional_storage_accounts:
        storage_accounts += [
            StorageAccount(name=s.storage_account_endpoint,
                           key=s.storage_account_key,
                           container=s.container,
                           is_default=False)
            for s in additional_storage_accounts
        ]

    assign_identities = []
    if assign_identity:
        assign_identities.append(assign_identity)

    if storage_account_managed_identity:
        assign_identities.append(storage_account_managed_identity)

    cluster_identity = build_identities_info(
        assign_identities) if assign_identities else None

    domain_name = domain and parse_domain_name(domain)
    if not ldaps_urls and domain_name:
        ldaps_urls = ['ldaps://{}:636'.format(domain_name)]

    security_profile = domain and SecurityProfile(
        directory_type=DirectoryType.active_directory,
        domain=domain_name,
        ldaps_urls=ldaps_urls,
        domain_username=cluster_admin_account,
        domain_user_password=cluster_admin_password,
        cluster_users_group_dns=cluster_users_group_dns,
        aadds_resource_id=domain,
        msi_resource_id=assign_identity)

    disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties(
        vault_uri=encryption_vault_uri,
        key_name=encryption_key_name,
        key_version=encryption_key_version,
        encryption_algorithm=encryption_algorithm,
        msi_resource_id=assign_identity)

    if encryption_at_host:
        if disk_encryption_properties:
            disk_encryption_properties.encryption_at_host = encryption_at_host
        else:
            disk_encryption_properties = DiskEncryptionProperties(
                encryption_at_host=encryption_at_host)

    kafka_rest_properties = (
        kafka_client_group_id and kafka_client_group_name
    ) and KafkaRestProperties(client_group_info=ClientGroupInfo(
        group_id=kafka_client_group_id, group_name=kafka_client_group_name))

    encryption_in_transit_properties = encryption_in_transit and EncryptionInTransitProperties(
        is_encryption_in_transit_enabled=encryption_in_transit)

    # relay outbound and private link
    network_properties = (
        resource_provider_connection
        or enable_private_link) and NetworkProperties(
            resource_provider_connection=resource_provider_connection,
            private_link=PrivateLink.enabled
            if enable_private_link is True else PrivateLink.disabled)

    # compute isolation
    compute_isolation_properties = enable_compute_isolation and ComputeIsolationProperties(
        enable_compute_isolation=enable_compute_isolation, host_sku=host_sku)

    create_params = ClusterCreateParametersExtended(
        location=location,
        tags=tags,
        properties=ClusterCreateProperties(
            cluster_version=cluster_version,
            os_type=OSType.linux,
            tier=cluster_tier,
            cluster_definition=ClusterDefinition(
                kind=cluster_type,
                configurations=cluster_configurations,
                component_version=component_version),
            compute_profile=ComputeProfile(roles=roles),
            storage_profile=StorageProfile(storageaccounts=storage_accounts),
            security_profile=security_profile,
            disk_encryption_properties=disk_encryption_properties,
            kafka_rest_properties=kafka_rest_properties,
            min_supported_tls_version=minimal_tls_version,
            encryption_in_transit_properties=encryption_in_transit_properties,
            network_properties=network_properties,
            compute_isolation_properties=compute_isolation_properties),
        identity=cluster_identity)

    if no_wait:
        return sdk_no_wait(no_wait, client.create, resource_group_name,
                           cluster_name, create_params)

    return client.create(resource_group_name, cluster_name, create_params)
example_cluster_params: ClusterCreateProperties = ClusterCreateProperties(
    cluster_version="3.6",
    os_type=OSType.linux,
    tier=Tier.standard,
    cluster_definition=ClusterDefinition(
        kind="spark",
        configurations={
            "gateway": {
                "restAuthCredential.enabled_credential": "True",
                "restAuthCredential.username": "******",
                "restAuthCredential.password": "******"
            }
        }),
    compute_profile=ComputeProfile(roles=[
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size="Large"),
             os_profile=OsProfile(
                 linux_operating_system_profile=LinuxOperatingSystemProfile(
                     username="******", password="******"))),
        Role(name="workernode",
             target_instance_count=1,
             hardware_profile=HardwareProfile(vm_size="Large"),
             os_profile=OsProfile(
                 linux_operating_system_profile=LinuxOperatingSystemProfile(
                     username="******", password="******")))
    ]),
    storage_profile=StorageProfile(storageaccounts=[
        StorageAccount(name="storage_account",
                       key="storage_account_key",
                       container="container",
                       is_default=True)
    ]))
예제 #4
0
def create_cluster(cmd,
                   client,
                   cluster_name,
                   resource_group_name,
                   location=None,
                   tags=None,
                   no_wait=False,
                   cluster_version='default',
                   cluster_type='spark',
                   cluster_tier=None,
                   cluster_configurations=None,
                   component_version=None,
                   headnode_size='large',
                   workernode_size='large',
                   zookeepernode_size=None,
                   edgenode_size=None,
                   workernode_count=3,
                   workernode_data_disks_per_node=None,
                   workernode_data_disk_storage_account_type=None,
                   workernode_data_disk_size=None,
                   http_username=None,
                   http_password=None,
                   ssh_username='******',
                   ssh_password=None,
                   ssh_public_key=None,
                   storage_account=None,
                   storage_account_key=None,
                   storage_default_container=None,
                   storage_default_filesystem=None,
                   virtual_network=None,
                   subnet_name=None):
    from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \
        ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \
        StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups

    # Update optional parameters with defaults
    additional_storage_accounts = [
    ]  # TODO: Add support for additional storage accounts
    location = location or _get_rg_location(cmd.cli_ctx, resource_group_name)

    # Format dictionary/free-form arguments
    if cluster_configurations:
        import json
        try:
            cluster_configurations = json.loads(cluster_configurations)
        except ValueError as ex:
            raise CLIError(
                'The cluster_configurations argument must be valid JSON. Error: {}'
                .format(str(ex)))
    else:
        cluster_configurations = dict()
    if component_version:
        # See validator
        component_version = {
            c: v
            for c, v in [version.split('=') for version in component_version]
        }

    # Validate whether HTTP credentials were provided
    if 'gateway' in cluster_configurations:
        gateway_config = cluster_configurations['gateway']
    else:
        gateway_config = dict()
    if http_username and 'restAuthCredential.username' in gateway_config:
        raise CLIError(
            'An HTTP username must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    else:
        http_username = '******'  # Implement default logic here, in case a user specifies the username in configurations
    is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config
    if http_password and is_password_in_cluster_config:
        raise CLIError(
            'An HTTP password must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    if not (http_password or is_password_in_cluster_config):
        raise CLIError('An HTTP password is required.')

    # Update the cluster config with the HTTP credentials
    gateway_config[
        'restAuthCredential.isEnabled'] = 'true'  # HTTP credentials are required
    http_username = http_username or gateway_config[
        'restAuthCredential.username']
    gateway_config['restAuthCredential.username'] = http_username
    http_password = http_password or gateway_config[
        'restAuthCredential.password']
    gateway_config['restAuthCredential.password'] = http_password
    cluster_configurations['gateway'] = gateway_config

    # Validate whether SSH credentials were provided
    if not (ssh_password or ssh_public_key):
        logger.warning(
            "SSH credentials not specified. Using the HTTP password as the SSH password."
        )
        ssh_password = http_password

    # Validate storage arguments from the user
    if storage_default_container and storage_default_filesystem:
        raise CLIError(
            'Either the default container or the default filesystem can be specified, but not both.'
        )

    # Attempt to infer the storage account key from the endpoint
    if not storage_account_key and storage_account:
        from .util import get_key_for_storage_account
        logger.info(
            'Storage account key not specified. Attempting to retrieve key...')
        key = get_key_for_storage_account(cmd, storage_account,
                                          resource_group_name)
        if not key:
            logger.warning(
                'Storage account key could not be inferred from storage account.'
            )
        else:
            storage_account_key = key

    # Attempt to provide a default container for WASB storage accounts
    if not storage_default_container and storage_account and _is_wasb_endpoint(
            storage_account):
        storage_default_container = cluster_name
        logger.warning('Default WASB container not specified, using "%s".',
                       storage_default_container)

    # Validate storage info parameters
    if not _all_or_none(
            storage_account, storage_account_key,
        (storage_default_container or storage_default_filesystem)):
        raise CLIError(
            'If storage details are specified, the storage account, storage account key, '
            'and either the default container or default filesystem must be specified.'
        )

    # Validate network profile parameters
    if not _all_or_none(virtual_network, subnet_name):
        raise CLIError(
            'Either both the virtual network and subnet should be specified, or neither should be.'
        )
    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = virtual_network and VirtualNetworkProfile(
        id=virtual_network, subnet=subnet_name)

    # Validate data disk parameters
    if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type:
        raise CLIError(
            "Cannot define data disk storage account type unless disks per node is defined."
        )
    if not workernode_data_disks_per_node and workernode_data_disk_size:
        raise CLIError(
            "Cannot define data disk size unless disks per node is defined.")
    # Specify data disk groups only when disk arguments are provided
    workernode_data_disk_groups = workernode_data_disks_per_node and [
        DataDisksGroups(
            disks_per_node=workernode_data_disks_per_node,
            storage_account_type=workernode_data_disk_storage_account_type,
            disk_size_gb=workernode_data_disk_size)
    ]

    os_profile = OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_public_key=ssh_public_key))

    roles = [
        # Required roles
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size=headnode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile),
        Role(name="workernode",
             target_instance_count=workernode_count,
             hardware_profile=HardwareProfile(vm_size=workernode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile,
             data_disks_groups=workernode_data_disk_groups)
    ]
    if zookeepernode_size:
        roles.append(
            Role(name="zookeepernode",
                 target_instance_count=3,
                 hardware_profile=HardwareProfile(vm_size=zookeepernode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))
    if edgenode_size:
        roles.append(
            Role(name="edgenode",
                 target_instance_count=1,
                 hardware_profile=HardwareProfile(vm_size=edgenode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))

    storage_accounts = []
    if storage_account:
        # Specify storage account details only when storage arguments are provided
        storage_accounts.append(
            StorageAccount(name=storage_account,
                           key=storage_account_key,
                           container=storage_default_container,
                           file_system=storage_default_filesystem,
                           is_default=True))
    if additional_storage_accounts:
        storage_accounts += [
            StorageAccount(name=s.storage_account,
                           key=s.storage_account_key,
                           container=s.container,
                           is_default=False)
            for s in additional_storage_accounts
        ]

    create_params = ClusterCreateParametersExtended(
        location=location,
        tags=tags,
        properties=ClusterCreateProperties(
            cluster_version=cluster_version,
            os_type=OSType.linux,
            tier=cluster_tier,
            cluster_definition=ClusterDefinition(
                kind=cluster_type,
                configurations=cluster_configurations,
                component_version=component_version),
            compute_profile=ComputeProfile(roles=roles),
            storage_profile=StorageProfile(storageaccounts=storage_accounts)))

    if no_wait:
        return sdk_no_wait(no_wait, client.create, resource_group_name,
                           cluster_name, create_params)

    return client.create(resource_group_name, cluster_name, create_params)
예제 #5
0
    def start(self):
        """
        Make the cluster operational in DSS, creating an actual cluster if necessary.
        
        :returns: a tuple of : 
                  * the settings needed to access hadoop/hive/impala/spark on the cluster. If not
                    specified, then the corresponding element (hadoop/hive/impala/spark) is not overriden
                  * an dict of data to pass to to other methods when handling the cluster created
        """
        logging.info("Init cluster for HDI")

        create_params = ClusterCreateParametersExtended(
            location=self.location,
            tags={},
            properties=ClusterCreateProperties(
                #TODO: parametrize this correctly
                cluster_version="3.6",
                os_type=OSType.linux,
                tier=Tier.standard,
                cluster_definition=ClusterDefinition(
                    kind="spark",
                    configurations={
                        "gateway": {
                            "restAuthCredential.enabled_credential": "True",
                            "restAuthCredential.username":
                            self.gateway_username,
                            "restAuthCredential.password":
                            self.gateway_password
                        }
                    }),
                compute_profile=ComputeProfile(roles=[
                    Role(name="headnode",
                         target_instance_count=2,
                         hardware_profile=HardwareProfile(
                             vm_size=self.headnode_size),
                         os_profile=OsProfile(linux_operating_system_profile=
                                              LinuxOperatingSystemProfile(
                                                  username=self.ssh_username,
                                                  password=self.ssh_password)),
                         virtual_network_profile=self.vnet_profile),
                    Role(name="workernode",
                         target_instance_count=self.worker_count,
                         hardware_profile=HardwareProfile(
                             vm_size=self.worker_size),
                         os_profile=OsProfile(linux_operating_system_profile=
                                              LinuxOperatingSystemProfile(
                                                  username=self.ssh_username,
                                                  password=self.ssh_password)),
                         virtual_network_profile=self.vnet_profile)
                ]),
                storage_profile=StorageProfile(storageaccounts=[
                    StorageAccount(name=self.storage_account_name,
                                   key=self.storage_account_key,
                                   container=self.storage_account_container,
                                   is_default=True)
                ])))

        logging.info('Creating Cluster ....')
        create_poller = self.hdi_client.clusters.create(
            self.resource_group_name, self.hdi_cluster_name, create_params)
        logging.info('Waiting for result poller...')
        try:
            cluster = create_poller.result()
        except:
            logging.error(
                'Cluster creation failed, deleting what was provisioned')
            try:
                self.hdi_client.clusters.delete(self.resource_group_name,
                                                self.hdi_cluster_name)
            except:
                logging.error('Could not delete provisioned resources')
                pass
            raise

        logging.info('Poller resturned {}'.format(pformat(cluster)))

        try:
            dss_cluster_config = dku_hdi.make_cluster_keys_and_data(
                self.aad_client_credentials, self.subscription_id,
                self.hdi_cluster_name, self.resource_group_name)
        except:
            logging.error('Could not attach to created cluster, deleting')
            try:
                self.hdi_client.clusters.delete(self.resource_group_name,
                                                self.hdi_cluster_name)
            except:
                logging.error('Could not delete created cluster')
                pass
            raise

        return dss_cluster_config