def create_cluster(cmd, client, cluster_name, resource_group_name, cluster_type, location=None, tags=None, no_wait=False, cluster_version='default', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, storage_account_managed_identity=None, vnet_name=None, subnet=None, domain=None, ldaps_urls=None, cluster_admin_account=None, cluster_admin_password=None, cluster_users_group_dns=None, assign_identity=None, encryption_vault_uri=None, encryption_key_name=None, encryption_key_version=None, encryption_algorithm='RSA-OAEP', esp=False, no_validation_timeout=False): from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \ get_storage_account_endpoint, validate_esp_cluster_create_params from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \ DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey validate_esp_cluster_create_params(esp, cluster_name, resource_group_name, cluster_type, subnet, domain, cluster_admin_account, assign_identity, ldaps_urls, cluster_admin_password, cluster_users_group_dns) if esp: if cluster_tier == Tier.standard: raise CLIError('Cluster tier cannot be {} when --esp is specified. ' 'Please use default value or specify {} explicitly.'.format(Tier.standard, Tier.premium)) if not cluster_tier: cluster_tier = Tier.premium # Update optional parameters with defaults location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if cluster_configurations: import json try: cluster_configurations = json.loads(cluster_configurations) except ValueError as ex: raise CLIError('The cluster_configurations argument must be valid JSON. Error: {}'.format(str(ex))) else: cluster_configurations = dict() if component_version: # See validator component_version = {c: v for c, v in [version.split('=') for version in component_version]} # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError('An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not http_username: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations if not http_password: try: http_password = prompt_pass('HTTP password for the cluster:', confirm=True) except NoTTYException: raise CLIError('Please specify --http-password in non-interactive mode.') # Update the cluster config with the HTTP credentials gateway_config['restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config['restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning("SSH credentials not specified. Using the HTTP password as the SSH password.") ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError('Either the default container or the default filesystem can be specified, but not both.') # Retrieve primary blob service endpoint is_wasb = not storage_account_managed_identity storage_account_endpoint = None if storage_account: storage_account_endpoint = get_storage_account_endpoint(cmd, storage_account, is_wasb) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account and is_wasb: from .util import get_key_for_storage_account logger.info('Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account) if not key: raise CLIError('Storage account key could not be inferred from storage account.') storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and is_wasb: storage_default_container = cluster_name.lower() logger.warning('Default WASB container not specified, using "%s".', storage_default_container) elif not storage_default_filesystem and not is_wasb: storage_default_filesystem = cluster_name.lower() logger.warning('Default ADLS file system not specified, using "%s".', storage_default_filesystem) # Validate storage info parameters if is_wasb and not _all_or_none(storage_account, storage_account_key, storage_default_container): raise CLIError('If storage details are specified, the storage account, storage account key, ' 'and the default container must be specified.') if not is_wasb and not _all_or_none(storage_account, storage_default_filesystem): raise CLIError('If storage details are specified, the storage account, ' 'and the default filesystem must be specified.') # Validate disk encryption parameters if not _all_or_none(encryption_vault_uri, encryption_key_name, encryption_key_version): raise CLIError('Either the encryption vault URI, key name and key version should be specified, ' 'or none of them should be.') # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError("Cannot define data disk storage account type unless disks per node is defined.") if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError("Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size ) ] os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey( certificate_data=ssh_public_key )] ) ) ) roles = [ # Required roles Role( name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile ), Role( name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups ) ] if zookeepernode_size: roles.append( Role( name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile )) if edgenode_size: roles.append( Role( name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile )) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount( name=storage_account_endpoint, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, resource_id=None if is_wasb else storage_account, msi_resource_id=storage_account_managed_identity, is_default=True ) ) additional_storage_accounts = [] # TODO: Add support for additional storage accounts if additional_storage_accounts: storage_accounts += [ StorageAccount( name=s.storage_account_endpoint, key=s.storage_account_key, container=s.container, is_default=False ) for s in additional_storage_accounts ] assign_identities = [] if assign_identity: assign_identities.append(assign_identity) if storage_account_managed_identity: assign_identities.append(storage_account_managed_identity) cluster_identity = build_identities_info(assign_identities) if assign_identities else None domain_name = domain and parse_domain_name(domain) if not ldaps_urls and domain_name: ldaps_urls = ['ldaps://{}:636'.format(domain_name)] security_profile = domain and SecurityProfile( directory_type=DirectoryType.active_directory, domain=domain_name, ldaps_urls=ldaps_urls, domain_username=cluster_admin_account, domain_user_password=cluster_admin_password, cluster_users_group_dns=cluster_users_group_dns, aadds_resource_id=domain, msi_resource_id=assign_identity ) disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties( vault_uri=encryption_vault_uri, key_name=encryption_key_name, key_version=encryption_key_version, encryption_algorithm=encryption_algorithm, msi_resource_id=assign_identity ) create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version ), compute_profile=ComputeProfile( roles=roles ), storage_profile=StorageProfile( storageaccounts=storage_accounts ), security_profile=security_profile, disk_encryption_properties=disk_encryption_properties ), identity=cluster_identity ) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def create_hdi_application(cmd, client, resource_group_name, cluster_name, application_name, script_uri, script_action_name, script_parameters=None, edgenode_size='Standard_D3_V2', ssh_username='******', ssh_password=None, ssh_public_key=None, marketplace_identifier=None, application_type='CustomApplication', tags=None, https_endpoint_access_mode=None, https_endpoint_location=None, https_endpoint_destination_port=8080, https_endpoint_public_port=443, sub_domain_suffix=None, disable_gateway_auth=None, ssh_endpoint_location=None, ssh_endpoint_destination_port=22, ssh_endpoint_public_port=22, vnet_name=None, subnet=None, no_validation_timeout=False): from .util import build_virtual_network_profile from azure.mgmt.hdinsight.models import Application, ApplicationProperties, ComputeProfile, RuntimeScriptAction, \ Role, LinuxOperatingSystemProfile, HardwareProfile, \ ApplicationGetHttpsEndpoint, ApplicationGetEndpoint, OsProfile # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) os_profile = (ssh_password or ssh_public_key) and OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_public_key=ssh_public_key ) ) roles = [ Role( name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile ) ] # Validate network profile parameters if not _all_or_none(https_endpoint_access_mode, https_endpoint_location): raise CLIError('Either both the https endpoint location and access mode should be specified, ' 'or neither should be.') https_endpoints = [] if https_endpoint_location: https_endpoints.append( ApplicationGetHttpsEndpoint( access_modes=[https_endpoint_access_mode], location=https_endpoint_location, destination_port=https_endpoint_destination_port, public_port=https_endpoint_public_port, sub_domain_suffix=sub_domain_suffix, disable_gateway_auth=disable_gateway_auth ) ) ssh_endpoints = [] if ssh_endpoint_location: ssh_endpoints.append( ApplicationGetEndpoint( location=ssh_endpoint_location, destination_port=ssh_endpoint_destination_port, public_port=ssh_endpoint_public_port ) ) application_properties = ApplicationProperties( compute_profile=ComputeProfile( roles=roles ), install_script_actions=[ RuntimeScriptAction( name=script_action_name, uri=script_uri, parameters=script_parameters, roles=[role.name for role in roles] ) ], https_endpoints=https_endpoints, ssh_endpoints=ssh_endpoints, application_type=application_type, marketplace_identifier=marketplace_identifier, ) create_params = Application( tags=tags, properties=application_properties ) return client.create(resource_group_name, cluster_name, application_name, create_params)
def create_cluster(cmd, client, cluster_name, resource_group_name, cluster_type, location=None, tags=None, no_wait=False, cluster_version='default', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size=None, workernode_size=None, zookeepernode_size=None, edgenode_size=None, kafka_management_node_size=None, kafka_management_node_count=2, kafka_client_group_id=None, kafka_client_group_name=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, storage_account_managed_identity=None, vnet_name=None, subnet=None, domain=None, ldaps_urls=None, cluster_admin_account=None, cluster_admin_password=None, cluster_users_group_dns=None, assign_identity=None, minimal_tls_version=None, encryption_vault_uri=None, encryption_key_name=None, encryption_key_version=None, encryption_algorithm='RSA-OAEP', encryption_in_transit=None, autoscale_type=None, autoscale_min_workernode_count=None, autoscale_max_workernode_count=None, timezone=None, days=None, time=None, autoscale_workernode_count=None, encryption_at_host=None, esp=False, idbroker=False, resource_provider_connection=None, enable_private_link=None, enable_compute_isolation=None, host_sku=None, no_validation_timeout=False): from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \ get_storage_account_endpoint, validate_esp_cluster_create_params, set_vm_size from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \ DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey, \ KafkaRestProperties, ClientGroupInfo, EncryptionInTransitProperties, \ Autoscale, AutoscaleCapacity, AutoscaleRecurrence, AutoscaleSchedule, AutoscaleTimeAndCapacity, \ NetworkProperties, PrivateLink, ComputeIsolationProperties validate_esp_cluster_create_params(esp, cluster_name, resource_group_name, cluster_type, subnet, domain, cluster_admin_account, assign_identity, ldaps_urls, cluster_admin_password, cluster_users_group_dns) if esp: if cluster_tier == Tier.standard: raise CLIError( 'Cluster tier cannot be {} when --esp is specified. ' 'Please use default value or specify {} explicitly.'.format( Tier.standard, Tier.premium)) if not cluster_tier: cluster_tier = Tier.premium # Update optional parameters with defaults location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if not cluster_configurations: cluster_configurations = dict() if component_version: # See validator component_version = { c: v for c, v in [version.split('=') for version in component_version] } # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError( 'An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not http_username: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations if not http_password: try: http_password = prompt_pass('HTTP password for the cluster:', confirm=True) except NoTTYException: raise CLIError( 'Please specify --http-password in non-interactive mode.') # Update the cluster config with the HTTP credentials gateway_config[ 'restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config[ 'restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning( "SSH credentials not specified. Using the HTTP password as the SSH password." ) ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError( 'Either the default container or the default filesystem can be specified, but not both.' ) # Retrieve primary blob service endpoint is_wasb = not storage_account_managed_identity storage_account_endpoint = None if storage_account: storage_account_endpoint = get_storage_account_endpoint( cmd, storage_account, is_wasb) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account and is_wasb: from .util import get_key_for_storage_account logger.info( 'Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account) if not key: raise CLIError( 'Storage account key could not be inferred from storage account.' ) storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and storage_account and is_wasb: storage_default_container = cluster_name.lower() logger.warning('Default WASB container not specified, using "%s".', storage_default_container) elif not storage_default_filesystem and not is_wasb: storage_default_filesystem = cluster_name.lower() logger.warning('Default ADLS file system not specified, using "%s".', storage_default_filesystem) # Validate storage info parameters if is_wasb and not _all_or_none(storage_account, storage_account_key, storage_default_container): raise CLIError( 'If storage details are specified, the storage account, storage account key, ' 'and the default container must be specified.') if not is_wasb and not _all_or_none(storage_account, storage_default_filesystem): raise CLIError( 'If storage details are specified, the storage account, ' 'and the default filesystem must be specified.') # Validate disk encryption parameters if not _all_or_none(encryption_vault_uri, encryption_key_name, encryption_key_version): raise CLIError( 'Either the encryption vault URI, key name and key version should be specified, ' 'or none of them should be.') # Validate kafka rest proxy parameters if not _all_or_none(kafka_client_group_id, kafka_client_group_name): raise CLIError( 'Either the kafka client group id and kafka client group name should be specified, ' 'or none of them should be') # Validate and initialize autoscale setting autoscale_configuration = None load_based_type = "Load" schedule_based_type = "Schedule" if autoscale_type and autoscale_type.lower() == load_based_type.lower(): if not all( [autoscale_min_workernode_count, autoscale_max_workernode_count]): raise CLIError( 'When the --autoscale-type is Load, ' 'both --autoscale-min-workernode-count and --autoscale-max-workernode-count should be specified.' ) autoscale_configuration = Autoscale(capacity=AutoscaleCapacity( min_instance_count=autoscale_min_workernode_count, max_instance_count=autoscale_max_workernode_count)) elif autoscale_type and autoscale_type.lower( ) == schedule_based_type.lower(): if not all([timezone, days, time, autoscale_workernode_count]): raise CLIError( 'When the --autoscale-type is Schedule, all of the --timezone, --days, --time, ' '--autoscale-workernode-count should be specified.') autoscale_configuration = Autoscale(recurrence=AutoscaleRecurrence( time_zone=timezone, schedule=[ AutoscaleSchedule( days=days, time_and_capacity=AutoscaleTimeAndCapacity( time=time, min_instance_count=autoscale_workernode_count, max_instance_count=autoscale_workernode_count)) ])) # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError( "Cannot define data disk storage account type unless disks per node is defined." ) if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError( "Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size) ] # call get default vm size api to set vm size if customer does not provide the value if not (workernode_size and headnode_size): headnode_size, workernode_size = set_vm_size(cmd.cli_ctx, location, cluster_type, headnode_size, workernode_size) if not headnode_size: raise RequiredArgumentMissingError( 'Please specify --headnode-size explicitly.') if not workernode_size: raise RequiredArgumentMissingError( 'Please specify --workernode-size explicitly.') os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey(certificate_data=ssh_public_key)]))) roles = [ # Required roles Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile), Role(name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups, autoscale_configuration=autoscale_configuration) ] if zookeepernode_size: roles.append( Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if edgenode_size: roles.append( Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if kafka_management_node_size: # generate kafkaRestProperties roles.append( Role(name="kafkamanagementnode", target_instance_count=kafka_management_node_count, hardware_profile=HardwareProfile( vm_size=kafka_management_node_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if esp and idbroker: roles.append( Role(name="idbrokernode", target_instance_count=2, virtual_network_profile=virtual_network_profile)) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount(name=storage_account_endpoint, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, resource_id=storage_account, msi_resource_id=storage_account_managed_identity, is_default=True)) additional_storage_accounts = [ ] # TODO: Add support for additional storage accounts if additional_storage_accounts: storage_accounts += [ StorageAccount(name=s.storage_account_endpoint, key=s.storage_account_key, container=s.container, is_default=False) for s in additional_storage_accounts ] assign_identities = [] if assign_identity: assign_identities.append(assign_identity) if storage_account_managed_identity: assign_identities.append(storage_account_managed_identity) cluster_identity = build_identities_info( assign_identities) if assign_identities else None domain_name = domain and parse_domain_name(domain) if not ldaps_urls and domain_name: ldaps_urls = ['ldaps://{}:636'.format(domain_name)] security_profile = domain and SecurityProfile( directory_type=DirectoryType.active_directory, domain=domain_name, ldaps_urls=ldaps_urls, domain_username=cluster_admin_account, domain_user_password=cluster_admin_password, cluster_users_group_dns=cluster_users_group_dns, aadds_resource_id=domain, msi_resource_id=assign_identity) disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties( vault_uri=encryption_vault_uri, key_name=encryption_key_name, key_version=encryption_key_version, encryption_algorithm=encryption_algorithm, msi_resource_id=assign_identity) if encryption_at_host: if disk_encryption_properties: disk_encryption_properties.encryption_at_host = encryption_at_host else: disk_encryption_properties = DiskEncryptionProperties( encryption_at_host=encryption_at_host) kafka_rest_properties = ( kafka_client_group_id and kafka_client_group_name ) and KafkaRestProperties(client_group_info=ClientGroupInfo( group_id=kafka_client_group_id, group_name=kafka_client_group_name)) encryption_in_transit_properties = encryption_in_transit and EncryptionInTransitProperties( is_encryption_in_transit_enabled=encryption_in_transit) # relay outbound and private link network_properties = ( resource_provider_connection or enable_private_link) and NetworkProperties( resource_provider_connection=resource_provider_connection, private_link=PrivateLink.enabled if enable_private_link is True else PrivateLink.disabled) # compute isolation compute_isolation_properties = enable_compute_isolation and ComputeIsolationProperties( enable_compute_isolation=enable_compute_isolation, host_sku=host_sku) create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version), compute_profile=ComputeProfile(roles=roles), storage_profile=StorageProfile(storageaccounts=storage_accounts), security_profile=security_profile, disk_encryption_properties=disk_encryption_properties, kafka_rest_properties=kafka_rest_properties, min_supported_tls_version=minimal_tls_version, encryption_in_transit_properties=encryption_in_transit_properties, network_properties=network_properties, compute_isolation_properties=compute_isolation_properties), identity=cluster_identity) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def create_cluster(cmd, client, cluster_name, resource_group_name, location=None, tags=None, no_wait=False, cluster_version='default', cluster_type='spark', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, virtual_network=None, subnet_name=None): from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups # Update optional parameters with defaults additional_storage_accounts = [ ] # TODO: Add support for additional storage accounts location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if cluster_configurations: import json try: cluster_configurations = json.loads(cluster_configurations) except ValueError as ex: raise CLIError( 'The cluster_configurations argument must be valid JSON. Error: {}' .format(str(ex))) else: cluster_configurations = dict() if component_version: # See validator component_version = { c: v for c, v in [version.split('=') for version in component_version] } # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError( 'An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') else: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config if http_password and is_password_in_cluster_config: raise CLIError( 'An HTTP password must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not (http_password or is_password_in_cluster_config): raise CLIError('An HTTP password is required.') # Update the cluster config with the HTTP credentials gateway_config[ 'restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config[ 'restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username http_password = http_password or gateway_config[ 'restAuthCredential.password'] gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning( "SSH credentials not specified. Using the HTTP password as the SSH password." ) ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError( 'Either the default container or the default filesystem can be specified, but not both.' ) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account: from .util import get_key_for_storage_account logger.info( 'Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account, resource_group_name) if not key: logger.warning( 'Storage account key could not be inferred from storage account.' ) else: storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and storage_account and _is_wasb_endpoint( storage_account): storage_default_container = cluster_name logger.warning('Default WASB container not specified, using "%s".', storage_default_container) # Validate storage info parameters if not _all_or_none( storage_account, storage_account_key, (storage_default_container or storage_default_filesystem)): raise CLIError( 'If storage details are specified, the storage account, storage account key, ' 'and either the default container or default filesystem must be specified.' ) # Validate network profile parameters if not _all_or_none(virtual_network, subnet_name): raise CLIError( 'Either both the virtual network and subnet should be specified, or neither should be.' ) # Specify virtual network profile only when network arguments are provided virtual_network_profile = virtual_network and VirtualNetworkProfile( id=virtual_network, subnet=subnet_name) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError( "Cannot define data disk storage account type unless disks per node is defined." ) if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError( "Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size) ] os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_public_key=ssh_public_key)) roles = [ # Required roles Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile), Role(name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups) ] if zookeepernode_size: roles.append( Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if edgenode_size: roles.append( Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount(name=storage_account, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, is_default=True)) if additional_storage_accounts: storage_accounts += [ StorageAccount(name=s.storage_account, key=s.storage_account_key, container=s.container, is_default=False) for s in additional_storage_accounts ] create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version), compute_profile=ComputeProfile(roles=roles), storage_profile=StorageProfile(storageaccounts=storage_accounts))) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def create_hdi_application(cmd, client, resource_group_name, cluster_name, application_name, script_uri, script_action_name, script_parameters=None, edgenode_size='Standard_D3_V2', ssh_username='******', ssh_password=None, ssh_public_key=None, marketplace_identifier=None, application_type='CustomApplication', tags=None, https_endpoint_access_mode='WebPage', https_endpoint_destination_port=8080, sub_domain_suffix=None, disable_gateway_auth=None, vnet_name=None, subnet=None, no_validation_timeout=False): from .util import build_virtual_network_profile from azure.mgmt.hdinsight.models import Application, ApplicationProperties, ComputeProfile, RuntimeScriptAction, \ Role, LinuxOperatingSystemProfile, HardwareProfile, \ ApplicationGetHttpsEndpoint, OsProfile, SshProfile, SshPublicKey # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) os_profile = (ssh_password or ssh_public_key) and OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey(certificate_data=ssh_public_key)]))) roles = [ Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile) ] # Validate network profile parameters https_endpoints = [] if sub_domain_suffix: https_endpoints.append( ApplicationGetHttpsEndpoint( access_modes=[https_endpoint_access_mode], destination_port=https_endpoint_destination_port, sub_domain_suffix=sub_domain_suffix, disable_gateway_auth=disable_gateway_auth)) application_properties = ApplicationProperties( compute_profile=ComputeProfile(roles=roles), install_script_actions=[ RuntimeScriptAction(name=script_action_name, uri=script_uri, parameters=script_parameters, roles=[role.name for role in roles]) ], https_endpoints=https_endpoints, application_type=application_type, marketplace_identifier=marketplace_identifier, ) create_params = Application(tags=tags, properties=application_properties) return client.begin_create(resource_group_name, cluster_name, application_name, create_params)
cluster_definition=ClusterDefinition( kind="spark", configurations={ "gateway": { "restAuthCredential.enabled_credential": "True", "restAuthCredential.username": "******", "restAuthCredential.password": "******" } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username="******", password="******"))), Role(name="workernode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username="******", password="******"))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name="storage_account", key="storage_account_key", container="container", is_default=True) ])) dag = DAG(dag_id='azure_hdinsinght_test_dag', schedule_interval=timedelta(1),
def start(self): """ Make the cluster operational in DSS, creating an actual cluster if necessary. :returns: a tuple of : * the settings needed to access hadoop/hive/impala/spark on the cluster. If not specified, then the corresponding element (hadoop/hive/impala/spark) is not overriden * an dict of data to pass to to other methods when handling the cluster created """ logging.info("Init cluster for HDI") create_params = ClusterCreateParametersExtended( location=self.location, tags={}, properties=ClusterCreateProperties( #TODO: parametrize this correctly cluster_version="3.6", os_type=OSType.linux, tier=Tier.standard, cluster_definition=ClusterDefinition( kind="spark", configurations={ "gateway": { "restAuthCredential.enabled_credential": "True", "restAuthCredential.username": self.gateway_username, "restAuthCredential.password": self.gateway_password } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile( vm_size=self.headnode_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile), Role(name="workernode", target_instance_count=self.worker_count, hardware_profile=HardwareProfile( vm_size=self.worker_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=self.storage_account_name, key=self.storage_account_key, container=self.storage_account_container, is_default=True) ]))) logging.info('Creating Cluster ....') create_poller = self.hdi_client.clusters.create( self.resource_group_name, self.hdi_cluster_name, create_params) logging.info('Waiting for result poller...') try: cluster = create_poller.result() except: logging.error( 'Cluster creation failed, deleting what was provisioned') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete provisioned resources') pass raise logging.info('Poller resturned {}'.format(pformat(cluster))) try: dss_cluster_config = dku_hdi.make_cluster_keys_and_data( self.aad_client_credentials, self.subscription_id, self.hdi_cluster_name, self.resource_group_name) except: logging.error('Could not attach to created cluster, deleting') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete created cluster') pass raise return dss_cluster_config