def create_cluster(cmd, client, cluster_name, resource_group_name, location=None, tags=None, no_wait=False, cluster_version='default', cluster_type='spark', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, virtual_network=None, subnet_name=None): from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups # Update optional parameters with defaults additional_storage_accounts = [ ] # TODO: Add support for additional storage accounts location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if cluster_configurations: import json try: cluster_configurations = json.loads(cluster_configurations) except ValueError as ex: raise CLIError( 'The cluster_configurations argument must be valid JSON. Error: {}' .format(str(ex))) else: cluster_configurations = dict() if component_version: # See validator component_version = { c: v for c, v in [version.split('=') for version in component_version] } # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError( 'An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') else: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config if http_password and is_password_in_cluster_config: raise CLIError( 'An HTTP password must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not (http_password or is_password_in_cluster_config): raise CLIError('An HTTP password is required.') # Update the cluster config with the HTTP credentials gateway_config[ 'restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config[ 'restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username http_password = http_password or gateway_config[ 'restAuthCredential.password'] gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning( "SSH credentials not specified. Using the HTTP password as the SSH password." ) ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError( 'Either the default container or the default filesystem can be specified, but not both.' ) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account: from .util import get_key_for_storage_account logger.info( 'Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account, resource_group_name) if not key: logger.warning( 'Storage account key could not be inferred from storage account.' ) else: storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and storage_account and _is_wasb_endpoint( storage_account): storage_default_container = cluster_name logger.warning('Default WASB container not specified, using "%s".', storage_default_container) # Validate storage info parameters if not _all_or_none( storage_account, storage_account_key, (storage_default_container or storage_default_filesystem)): raise CLIError( 'If storage details are specified, the storage account, storage account key, ' 'and either the default container or default filesystem must be specified.' ) # Validate network profile parameters if not _all_or_none(virtual_network, subnet_name): raise CLIError( 'Either both the virtual network and subnet should be specified, or neither should be.' ) # Specify virtual network profile only when network arguments are provided virtual_network_profile = virtual_network and VirtualNetworkProfile( id=virtual_network, subnet=subnet_name) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError( "Cannot define data disk storage account type unless disks per node is defined." ) if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError( "Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size) ] os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_public_key=ssh_public_key)) roles = [ # Required roles Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile), Role(name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups) ] if zookeepernode_size: roles.append( Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if edgenode_size: roles.append( Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount(name=storage_account, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, is_default=True)) if additional_storage_accounts: storage_accounts += [ StorageAccount(name=s.storage_account, key=s.storage_account_key, container=s.container, is_default=False) for s in additional_storage_accounts ] create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version), compute_profile=ComputeProfile(roles=roles), storage_profile=StorageProfile(storageaccounts=storage_accounts))) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
tier=Tier.standard, cluster_definition=ClusterDefinition( kind="spark", configurations={ "gateway": { "restAuthCredential.enabled_credential": "True", "restAuthCredential.username": "******", "restAuthCredential.password": "******" } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username="******", password="******"))), Role(name="workernode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username="******", password="******"))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name="storage_account", key="storage_account_key", container="container", is_default=True) ]))
def create_hdi_application(cmd, client, resource_group_name, cluster_name, application_name, script_uri, script_action_name, script_parameters=None, edgenode_size='Standard_D3_V2', ssh_username='******', ssh_password=None, ssh_public_key=None, marketplace_identifier=None, application_type='CustomApplication', tags=None, https_endpoint_access_mode='WebPage', https_endpoint_destination_port=8080, sub_domain_suffix=None, disable_gateway_auth=None, vnet_name=None, subnet=None, no_validation_timeout=False): from .util import build_virtual_network_profile from azure.mgmt.hdinsight.models import Application, ApplicationProperties, ComputeProfile, RuntimeScriptAction, \ Role, LinuxOperatingSystemProfile, HardwareProfile, \ ApplicationGetHttpsEndpoint, OsProfile, SshProfile, SshPublicKey # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) os_profile = (ssh_password or ssh_public_key) and OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey(certificate_data=ssh_public_key)]))) roles = [ Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile) ] # Validate network profile parameters https_endpoints = [] if sub_domain_suffix: https_endpoints.append( ApplicationGetHttpsEndpoint( access_modes=[https_endpoint_access_mode], destination_port=https_endpoint_destination_port, sub_domain_suffix=sub_domain_suffix, disable_gateway_auth=disable_gateway_auth)) application_properties = ApplicationProperties( compute_profile=ComputeProfile(roles=roles), install_script_actions=[ RuntimeScriptAction(name=script_action_name, uri=script_uri, parameters=script_parameters, roles=[role.name for role in roles]) ], https_endpoints=https_endpoints, application_type=application_type, marketplace_identifier=marketplace_identifier, ) create_params = Application(tags=tags, properties=application_properties) return client.begin_create(resource_group_name, cluster_name, application_name, create_params)
def start(self): """ Make the cluster operational in DSS, creating an actual cluster if necessary. :returns: a tuple of : * the settings needed to access hadoop/hive/impala/spark on the cluster. If not specified, then the corresponding element (hadoop/hive/impala/spark) is not overriden * an dict of data to pass to to other methods when handling the cluster created """ logging.info("Init cluster for HDI") create_params = ClusterCreateParametersExtended( location=self.location, tags={}, properties=ClusterCreateProperties( #TODO: parametrize this correctly cluster_version="3.6", os_type=OSType.linux, tier=Tier.standard, cluster_definition=ClusterDefinition( kind="spark", configurations={ "gateway": { "restAuthCredential.enabled_credential": "True", "restAuthCredential.username": self.gateway_username, "restAuthCredential.password": self.gateway_password } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile( vm_size=self.headnode_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile), Role(name="workernode", target_instance_count=self.worker_count, hardware_profile=HardwareProfile( vm_size=self.worker_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=self.storage_account_name, key=self.storage_account_key, container=self.storage_account_container, is_default=True) ]))) logging.info('Creating Cluster ....') create_poller = self.hdi_client.clusters.create( self.resource_group_name, self.hdi_cluster_name, create_params) logging.info('Waiting for result poller...') try: cluster = create_poller.result() except: logging.error( 'Cluster creation failed, deleting what was provisioned') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete provisioned resources') pass raise logging.info('Poller resturned {}'.format(pformat(cluster))) try: dss_cluster_config = dku_hdi.make_cluster_keys_and_data( self.aad_client_credentials, self.subscription_id, self.hdi_cluster_name, self.resource_group_name) except: logging.error('Could not attach to created cluster, deleting') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete created cluster') pass raise return dss_cluster_config