Exemple #1
0
def create_cluster(cmd,
                   client,
                   cluster_name,
                   resource_group_name,
                   location=None,
                   tags=None,
                   no_wait=False,
                   cluster_version='default',
                   cluster_type='spark',
                   cluster_tier=None,
                   cluster_configurations=None,
                   component_version=None,
                   headnode_size='large',
                   workernode_size='large',
                   zookeepernode_size=None,
                   edgenode_size=None,
                   workernode_count=3,
                   workernode_data_disks_per_node=None,
                   workernode_data_disk_storage_account_type=None,
                   workernode_data_disk_size=None,
                   http_username=None,
                   http_password=None,
                   ssh_username='******',
                   ssh_password=None,
                   ssh_public_key=None,
                   storage_account=None,
                   storage_account_key=None,
                   storage_default_container=None,
                   storage_default_filesystem=None,
                   virtual_network=None,
                   subnet_name=None):
    from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \
        ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \
        StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups

    # Update optional parameters with defaults
    additional_storage_accounts = [
    ]  # TODO: Add support for additional storage accounts
    location = location or _get_rg_location(cmd.cli_ctx, resource_group_name)

    # Format dictionary/free-form arguments
    if cluster_configurations:
        import json
        try:
            cluster_configurations = json.loads(cluster_configurations)
        except ValueError as ex:
            raise CLIError(
                'The cluster_configurations argument must be valid JSON. Error: {}'
                .format(str(ex)))
    else:
        cluster_configurations = dict()
    if component_version:
        # See validator
        component_version = {
            c: v
            for c, v in [version.split('=') for version in component_version]
        }

    # Validate whether HTTP credentials were provided
    if 'gateway' in cluster_configurations:
        gateway_config = cluster_configurations['gateway']
    else:
        gateway_config = dict()
    if http_username and 'restAuthCredential.username' in gateway_config:
        raise CLIError(
            'An HTTP username must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    else:
        http_username = '******'  # Implement default logic here, in case a user specifies the username in configurations
    is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config
    if http_password and is_password_in_cluster_config:
        raise CLIError(
            'An HTTP password must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    if not (http_password or is_password_in_cluster_config):
        raise CLIError('An HTTP password is required.')

    # Update the cluster config with the HTTP credentials
    gateway_config[
        'restAuthCredential.isEnabled'] = 'true'  # HTTP credentials are required
    http_username = http_username or gateway_config[
        'restAuthCredential.username']
    gateway_config['restAuthCredential.username'] = http_username
    http_password = http_password or gateway_config[
        'restAuthCredential.password']
    gateway_config['restAuthCredential.password'] = http_password
    cluster_configurations['gateway'] = gateway_config

    # Validate whether SSH credentials were provided
    if not (ssh_password or ssh_public_key):
        logger.warning(
            "SSH credentials not specified. Using the HTTP password as the SSH password."
        )
        ssh_password = http_password

    # Validate storage arguments from the user
    if storage_default_container and storage_default_filesystem:
        raise CLIError(
            'Either the default container or the default filesystem can be specified, but not both.'
        )

    # Attempt to infer the storage account key from the endpoint
    if not storage_account_key and storage_account:
        from .util import get_key_for_storage_account
        logger.info(
            'Storage account key not specified. Attempting to retrieve key...')
        key = get_key_for_storage_account(cmd, storage_account,
                                          resource_group_name)
        if not key:
            logger.warning(
                'Storage account key could not be inferred from storage account.'
            )
        else:
            storage_account_key = key

    # Attempt to provide a default container for WASB storage accounts
    if not storage_default_container and storage_account and _is_wasb_endpoint(
            storage_account):
        storage_default_container = cluster_name
        logger.warning('Default WASB container not specified, using "%s".',
                       storage_default_container)

    # Validate storage info parameters
    if not _all_or_none(
            storage_account, storage_account_key,
        (storage_default_container or storage_default_filesystem)):
        raise CLIError(
            'If storage details are specified, the storage account, storage account key, '
            'and either the default container or default filesystem must be specified.'
        )

    # Validate network profile parameters
    if not _all_or_none(virtual_network, subnet_name):
        raise CLIError(
            'Either both the virtual network and subnet should be specified, or neither should be.'
        )
    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = virtual_network and VirtualNetworkProfile(
        id=virtual_network, subnet=subnet_name)

    # Validate data disk parameters
    if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type:
        raise CLIError(
            "Cannot define data disk storage account type unless disks per node is defined."
        )
    if not workernode_data_disks_per_node and workernode_data_disk_size:
        raise CLIError(
            "Cannot define data disk size unless disks per node is defined.")
    # Specify data disk groups only when disk arguments are provided
    workernode_data_disk_groups = workernode_data_disks_per_node and [
        DataDisksGroups(
            disks_per_node=workernode_data_disks_per_node,
            storage_account_type=workernode_data_disk_storage_account_type,
            disk_size_gb=workernode_data_disk_size)
    ]

    os_profile = OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_public_key=ssh_public_key))

    roles = [
        # Required roles
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size=headnode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile),
        Role(name="workernode",
             target_instance_count=workernode_count,
             hardware_profile=HardwareProfile(vm_size=workernode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile,
             data_disks_groups=workernode_data_disk_groups)
    ]
    if zookeepernode_size:
        roles.append(
            Role(name="zookeepernode",
                 target_instance_count=3,
                 hardware_profile=HardwareProfile(vm_size=zookeepernode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))
    if edgenode_size:
        roles.append(
            Role(name="edgenode",
                 target_instance_count=1,
                 hardware_profile=HardwareProfile(vm_size=edgenode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))

    storage_accounts = []
    if storage_account:
        # Specify storage account details only when storage arguments are provided
        storage_accounts.append(
            StorageAccount(name=storage_account,
                           key=storage_account_key,
                           container=storage_default_container,
                           file_system=storage_default_filesystem,
                           is_default=True))
    if additional_storage_accounts:
        storage_accounts += [
            StorageAccount(name=s.storage_account,
                           key=s.storage_account_key,
                           container=s.container,
                           is_default=False)
            for s in additional_storage_accounts
        ]

    create_params = ClusterCreateParametersExtended(
        location=location,
        tags=tags,
        properties=ClusterCreateProperties(
            cluster_version=cluster_version,
            os_type=OSType.linux,
            tier=cluster_tier,
            cluster_definition=ClusterDefinition(
                kind=cluster_type,
                configurations=cluster_configurations,
                component_version=component_version),
            compute_profile=ComputeProfile(roles=roles),
            storage_profile=StorageProfile(storageaccounts=storage_accounts)))

    if no_wait:
        return sdk_no_wait(no_wait, client.create, resource_group_name,
                           cluster_name, create_params)

    return client.create(resource_group_name, cluster_name, create_params)
    tier=Tier.standard,
    cluster_definition=ClusterDefinition(
        kind="spark",
        configurations={
            "gateway": {
                "restAuthCredential.enabled_credential": "True",
                "restAuthCredential.username": "******",
                "restAuthCredential.password": "******"
            }
        }),
    compute_profile=ComputeProfile(roles=[
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size="Large"),
             os_profile=OsProfile(
                 linux_operating_system_profile=LinuxOperatingSystemProfile(
                     username="******", password="******"))),
        Role(name="workernode",
             target_instance_count=1,
             hardware_profile=HardwareProfile(vm_size="Large"),
             os_profile=OsProfile(
                 linux_operating_system_profile=LinuxOperatingSystemProfile(
                     username="******", password="******")))
    ]),
    storage_profile=StorageProfile(storageaccounts=[
        StorageAccount(name="storage_account",
                       key="storage_account_key",
                       container="container",
                       is_default=True)
    ]))
Exemple #3
0
def create_hdi_application(cmd,
                           client,
                           resource_group_name,
                           cluster_name,
                           application_name,
                           script_uri,
                           script_action_name,
                           script_parameters=None,
                           edgenode_size='Standard_D3_V2',
                           ssh_username='******',
                           ssh_password=None,
                           ssh_public_key=None,
                           marketplace_identifier=None,
                           application_type='CustomApplication',
                           tags=None,
                           https_endpoint_access_mode='WebPage',
                           https_endpoint_destination_port=8080,
                           sub_domain_suffix=None,
                           disable_gateway_auth=None,
                           vnet_name=None,
                           subnet=None,
                           no_validation_timeout=False):
    from .util import build_virtual_network_profile
    from azure.mgmt.hdinsight.models import Application, ApplicationProperties, ComputeProfile, RuntimeScriptAction, \
        Role, LinuxOperatingSystemProfile, HardwareProfile, \
        ApplicationGetHttpsEndpoint, OsProfile, SshProfile, SshPublicKey

    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = subnet and build_virtual_network_profile(subnet)

    os_profile = (ssh_password or ssh_public_key) and OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_profile=ssh_public_key and SshProfile(
                public_keys=[SshPublicKey(certificate_data=ssh_public_key)])))

    roles = [
        Role(name="edgenode",
             target_instance_count=1,
             hardware_profile=HardwareProfile(vm_size=edgenode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile)
    ]

    # Validate network profile parameters
    https_endpoints = []
    if sub_domain_suffix:
        https_endpoints.append(
            ApplicationGetHttpsEndpoint(
                access_modes=[https_endpoint_access_mode],
                destination_port=https_endpoint_destination_port,
                sub_domain_suffix=sub_domain_suffix,
                disable_gateway_auth=disable_gateway_auth))

    application_properties = ApplicationProperties(
        compute_profile=ComputeProfile(roles=roles),
        install_script_actions=[
            RuntimeScriptAction(name=script_action_name,
                                uri=script_uri,
                                parameters=script_parameters,
                                roles=[role.name for role in roles])
        ],
        https_endpoints=https_endpoints,
        application_type=application_type,
        marketplace_identifier=marketplace_identifier,
    )

    create_params = Application(tags=tags, properties=application_properties)

    return client.begin_create(resource_group_name, cluster_name,
                               application_name, create_params)
Exemple #4
0
    def start(self):
        """
        Make the cluster operational in DSS, creating an actual cluster if necessary.
        
        :returns: a tuple of : 
                  * the settings needed to access hadoop/hive/impala/spark on the cluster. If not
                    specified, then the corresponding element (hadoop/hive/impala/spark) is not overriden
                  * an dict of data to pass to to other methods when handling the cluster created
        """
        logging.info("Init cluster for HDI")

        create_params = ClusterCreateParametersExtended(
            location=self.location,
            tags={},
            properties=ClusterCreateProperties(
                #TODO: parametrize this correctly
                cluster_version="3.6",
                os_type=OSType.linux,
                tier=Tier.standard,
                cluster_definition=ClusterDefinition(
                    kind="spark",
                    configurations={
                        "gateway": {
                            "restAuthCredential.enabled_credential": "True",
                            "restAuthCredential.username":
                            self.gateway_username,
                            "restAuthCredential.password":
                            self.gateway_password
                        }
                    }),
                compute_profile=ComputeProfile(roles=[
                    Role(name="headnode",
                         target_instance_count=2,
                         hardware_profile=HardwareProfile(
                             vm_size=self.headnode_size),
                         os_profile=OsProfile(linux_operating_system_profile=
                                              LinuxOperatingSystemProfile(
                                                  username=self.ssh_username,
                                                  password=self.ssh_password)),
                         virtual_network_profile=self.vnet_profile),
                    Role(name="workernode",
                         target_instance_count=self.worker_count,
                         hardware_profile=HardwareProfile(
                             vm_size=self.worker_size),
                         os_profile=OsProfile(linux_operating_system_profile=
                                              LinuxOperatingSystemProfile(
                                                  username=self.ssh_username,
                                                  password=self.ssh_password)),
                         virtual_network_profile=self.vnet_profile)
                ]),
                storage_profile=StorageProfile(storageaccounts=[
                    StorageAccount(name=self.storage_account_name,
                                   key=self.storage_account_key,
                                   container=self.storage_account_container,
                                   is_default=True)
                ])))

        logging.info('Creating Cluster ....')
        create_poller = self.hdi_client.clusters.create(
            self.resource_group_name, self.hdi_cluster_name, create_params)
        logging.info('Waiting for result poller...')
        try:
            cluster = create_poller.result()
        except:
            logging.error(
                'Cluster creation failed, deleting what was provisioned')
            try:
                self.hdi_client.clusters.delete(self.resource_group_name,
                                                self.hdi_cluster_name)
            except:
                logging.error('Could not delete provisioned resources')
                pass
            raise

        logging.info('Poller resturned {}'.format(pformat(cluster)))

        try:
            dss_cluster_config = dku_hdi.make_cluster_keys_and_data(
                self.aad_client_credentials, self.subscription_id,
                self.hdi_cluster_name, self.resource_group_name)
        except:
            logging.error('Could not attach to created cluster, deleting')
            try:
                self.hdi_client.clusters.delete(self.resource_group_name,
                                                self.hdi_cluster_name)
            except:
                logging.error('Could not delete created cluster')
                pass
            raise

        return dss_cluster_config