Beispiel #1
0
def build_virtual_network_profile(subnet):
    from msrestazure.tools import resource_id, parse_resource_id, is_valid_resource_id
    from azure.mgmt.hdinsight.models import VirtualNetworkProfile
    from knack.util import CLIError

    vnet_profile = None
    if is_valid_resource_id(subnet):
        parsed_subnet_id = parse_resource_id(subnet)
        subscription_name = parsed_subnet_id['subscription']
        resource_group_name = parsed_subnet_id['resource_group']
        vnet_namespace = parsed_subnet_id['namespace']
        vnet_type = parsed_subnet_id['type']
        vnet_name = parsed_subnet_id['name']
        vnet_id = resource_id(subscription=subscription_name,
                              resource_group=resource_group_name,
                              namespace=vnet_namespace,
                              type=vnet_type,
                              name=vnet_name)
        vnet_profile = VirtualNetworkProfile(id=vnet_id, subnet=subnet)
    elif subnet:
        raise CLIError('Invalid subnet: {}'.format(subnet))
    return vnet_profile
Beispiel #2
0
def create_hdi_application(cmd, client, resource_group_name, cluster_name, application_name,
                           script_uri, script_action_name, script_parameters=None, edgenode_size='Standard_D3_V2',
                           ssh_username='******', ssh_password=None, ssh_public_key=None,
                           marketplace_identifier=None, application_type='CustomApplication', tags=None,
                           https_endpoint_access_mode=None, https_endpoint_location=None,
                           https_endpoint_destination_port=8080, https_endpoint_public_port=443,
                           ssh_endpoint_location=None, ssh_endpoint_destination_port=22, ssh_endpoint_public_port=22,
                           virtual_network=None, subnet_name=None):
    from azure.mgmt.hdinsight.models import Application, ApplicationProperties, ComputeProfile, RuntimeScriptAction, \
        Role, VirtualNetworkProfile, LinuxOperatingSystemProfile, HardwareProfile, \
        ApplicationGetHttpsEndpoint, ApplicationGetEndpoint, OsProfile

    # Validate network profile parameters
    if not _all_or_none(virtual_network, subnet_name):
        raise CLIError('Either both the virtual network and subnet should be specified, or neither should be.')
    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = virtual_network and VirtualNetworkProfile(
        id=virtual_network,
        subnet=subnet_name
    )

    os_profile = (ssh_password or ssh_public_key) and OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_public_key=ssh_public_key
        )
    )

    roles = [
        Role(
            name="edgenode",
            target_instance_count=1,
            hardware_profile=HardwareProfile(vm_size=edgenode_size),
            os_profile=os_profile,
            virtual_network_profile=virtual_network_profile
        )
    ]

    # Validate network profile parameters
    if not _all_or_none(https_endpoint_access_mode, https_endpoint_location):
        raise CLIError('Either both the https endpoint location and access mode should be specified, '
                       'or neither should be.')

    https_endpoints = []
    if https_endpoint_location:
        https_endpoints.append(
            ApplicationGetHttpsEndpoint(
                access_modes=[https_endpoint_access_mode],
                location=https_endpoint_location,
                destination_port=https_endpoint_destination_port,
                public_port=https_endpoint_public_port,
            )
        )

    ssh_endpoints = []
    if ssh_endpoint_location:
        ssh_endpoints.append(
            ApplicationGetEndpoint(
                location=ssh_endpoint_location,
                destination_port=ssh_endpoint_destination_port,
                public_port=ssh_endpoint_public_port
            )
        )

    application_properties = ApplicationProperties(
        compute_profile=ComputeProfile(
            roles=roles
        ),
        install_script_actions=[
            RuntimeScriptAction(
                name=script_action_name,
                uri=script_uri,
                parameters=script_parameters,
                roles=list(map(lambda role: role.name, roles))
            )
        ],
        https_endpoints=https_endpoints,
        ssh_endpoints=ssh_endpoints,
        application_type=application_type,
        marketplace_identifier=marketplace_identifier,
    )

    create_params = Application(
        tags=tags,
        properties=application_properties
    )

    return client.create(resource_group_name, cluster_name, application_name, create_params)
Beispiel #3
0
def create_cluster(cmd,
                   client,
                   cluster_name,
                   resource_group_name,
                   location=None,
                   tags=None,
                   no_wait=False,
                   cluster_version='default',
                   cluster_type='spark',
                   cluster_tier=None,
                   cluster_configurations=None,
                   component_version=None,
                   headnode_size='large',
                   workernode_size='large',
                   zookeepernode_size=None,
                   edgenode_size=None,
                   workernode_count=3,
                   workernode_data_disks_per_node=None,
                   workernode_data_disk_storage_account_type=None,
                   workernode_data_disk_size=None,
                   http_username=None,
                   http_password=None,
                   ssh_username='******',
                   ssh_password=None,
                   ssh_public_key=None,
                   storage_account=None,
                   storage_account_key=None,
                   storage_default_container=None,
                   storage_default_filesystem=None,
                   virtual_network=None,
                   subnet_name=None):
    from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \
        ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \
        StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups

    # Update optional parameters with defaults
    additional_storage_accounts = [
    ]  # TODO: Add support for additional storage accounts
    location = location or _get_rg_location(cmd.cli_ctx, resource_group_name)

    # Format dictionary/free-form arguments
    if cluster_configurations:
        import json
        try:
            cluster_configurations = json.loads(cluster_configurations)
        except ValueError as ex:
            raise CLIError(
                'The cluster_configurations argument must be valid JSON. Error: {}'
                .format(str(ex)))
    else:
        cluster_configurations = dict()
    if component_version:
        # See validator
        component_version = {
            c: v
            for c, v in [version.split('=') for version in component_version]
        }

    # Validate whether HTTP credentials were provided
    if 'gateway' in cluster_configurations:
        gateway_config = cluster_configurations['gateway']
    else:
        gateway_config = dict()
    if http_username and 'restAuthCredential.username' in gateway_config:
        raise CLIError(
            'An HTTP username must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    else:
        http_username = '******'  # Implement default logic here, in case a user specifies the username in configurations
    is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config
    if http_password and is_password_in_cluster_config:
        raise CLIError(
            'An HTTP password must be specified either as a command-line parameter '
            'or in the cluster configuration, but not both.')
    if not (http_password or is_password_in_cluster_config):
        raise CLIError('An HTTP password is required.')

    # Update the cluster config with the HTTP credentials
    gateway_config[
        'restAuthCredential.isEnabled'] = 'true'  # HTTP credentials are required
    http_username = http_username or gateway_config[
        'restAuthCredential.username']
    gateway_config['restAuthCredential.username'] = http_username
    http_password = http_password or gateway_config[
        'restAuthCredential.password']
    gateway_config['restAuthCredential.password'] = http_password
    cluster_configurations['gateway'] = gateway_config

    # Validate whether SSH credentials were provided
    if not (ssh_password or ssh_public_key):
        logger.warning(
            "SSH credentials not specified. Using the HTTP password as the SSH password."
        )
        ssh_password = http_password

    # Validate storage arguments from the user
    if storage_default_container and storage_default_filesystem:
        raise CLIError(
            'Either the default container or the default filesystem can be specified, but not both.'
        )

    # Attempt to infer the storage account key from the endpoint
    if not storage_account_key and storage_account:
        from .util import get_key_for_storage_account
        logger.info(
            'Storage account key not specified. Attempting to retrieve key...')
        key = get_key_for_storage_account(cmd, storage_account,
                                          resource_group_name)
        if not key:
            logger.warning(
                'Storage account key could not be inferred from storage account.'
            )
        else:
            storage_account_key = key

    # Attempt to provide a default container for WASB storage accounts
    if not storage_default_container and storage_account and _is_wasb_endpoint(
            storage_account):
        storage_default_container = cluster_name
        logger.warning('Default WASB container not specified, using "%s".',
                       storage_default_container)

    # Validate storage info parameters
    if not _all_or_none(
            storage_account, storage_account_key,
        (storage_default_container or storage_default_filesystem)):
        raise CLIError(
            'If storage details are specified, the storage account, storage account key, '
            'and either the default container or default filesystem must be specified.'
        )

    # Validate network profile parameters
    if not _all_or_none(virtual_network, subnet_name):
        raise CLIError(
            'Either both the virtual network and subnet should be specified, or neither should be.'
        )
    # Specify virtual network profile only when network arguments are provided
    virtual_network_profile = virtual_network and VirtualNetworkProfile(
        id=virtual_network, subnet=subnet_name)

    # Validate data disk parameters
    if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type:
        raise CLIError(
            "Cannot define data disk storage account type unless disks per node is defined."
        )
    if not workernode_data_disks_per_node and workernode_data_disk_size:
        raise CLIError(
            "Cannot define data disk size unless disks per node is defined.")
    # Specify data disk groups only when disk arguments are provided
    workernode_data_disk_groups = workernode_data_disks_per_node and [
        DataDisksGroups(
            disks_per_node=workernode_data_disks_per_node,
            storage_account_type=workernode_data_disk_storage_account_type,
            disk_size_gb=workernode_data_disk_size)
    ]

    os_profile = OsProfile(
        linux_operating_system_profile=LinuxOperatingSystemProfile(
            username=ssh_username,
            password=ssh_password,
            ssh_public_key=ssh_public_key))

    roles = [
        # Required roles
        Role(name="headnode",
             target_instance_count=2,
             hardware_profile=HardwareProfile(vm_size=headnode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile),
        Role(name="workernode",
             target_instance_count=workernode_count,
             hardware_profile=HardwareProfile(vm_size=workernode_size),
             os_profile=os_profile,
             virtual_network_profile=virtual_network_profile,
             data_disks_groups=workernode_data_disk_groups)
    ]
    if zookeepernode_size:
        roles.append(
            Role(name="zookeepernode",
                 target_instance_count=3,
                 hardware_profile=HardwareProfile(vm_size=zookeepernode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))
    if edgenode_size:
        roles.append(
            Role(name="edgenode",
                 target_instance_count=1,
                 hardware_profile=HardwareProfile(vm_size=edgenode_size),
                 os_profile=os_profile,
                 virtual_network_profile=virtual_network_profile))

    storage_accounts = []
    if storage_account:
        # Specify storage account details only when storage arguments are provided
        storage_accounts.append(
            StorageAccount(name=storage_account,
                           key=storage_account_key,
                           container=storage_default_container,
                           file_system=storage_default_filesystem,
                           is_default=True))
    if additional_storage_accounts:
        storage_accounts += [
            StorageAccount(name=s.storage_account,
                           key=s.storage_account_key,
                           container=s.container,
                           is_default=False)
            for s in additional_storage_accounts
        ]

    create_params = ClusterCreateParametersExtended(
        location=location,
        tags=tags,
        properties=ClusterCreateProperties(
            cluster_version=cluster_version,
            os_type=OSType.linux,
            tier=cluster_tier,
            cluster_definition=ClusterDefinition(
                kind=cluster_type,
                configurations=cluster_configurations,
                component_version=component_version),
            compute_profile=ComputeProfile(roles=roles),
            storage_profile=StorageProfile(storageaccounts=storage_accounts)))

    if no_wait:
        return sdk_no_wait(no_wait, client.create, resource_group_name,
                           cluster_name, create_params)

    return client.create(resource_group_name, cluster_name, create_params)
Beispiel #4
0
    def __init__(self, cluster_id, cluster_name, config, plugin_config):
        """
        :param cluster_id: the DSS identifier for this instance of cluster
        :param cluster_name: the name given by the user to this cluster
        :param config: the dict of the configuration of the object
        :param plugin_config: contains the plugin settings
        """
        self.cluster_id = cluster_id
        self.dss_cluster_name = cluster_name
        self.config = config
        self.plugin_config = plugin_config

        self.aad_client_credentials = None

        #TODO: check when credentials are not the right way or incorrect
        if config['aadAuth'] == "user_pass":
            print("Using User Password authentication")
            self.aad_username = config['aad_username']
            self.aad_password = config['aad_password']
            self.aad_client_credentials = UserPassCredentials(
                username=self.aad_username, password=self.aad_password)
        elif config['aadAuth'] == "service_principal":
            print('Using Service Principal for authentication')
            self.client_id = config['client_id']
            self.client_secret = config['client_secret']
            self.tenant_id = config['tenant_id']
            self.aad_client_credentials = ServicePrincipalCredentials(
                self.client_id, self.client_secret, tenant=self.tenant_id)
        else:
            raise ValueError('Unsupported authentication method')

        #params
        self.subscription_id = config['subscription_id']
        self.cluster_version = config['cluster_version']
        self.hdi_cluster_name = config['basename']
        self.resource_group_name = config['resource_group_name']
        self.location = config['location']
        #TODO: should retreive available formats for output in case of error?
        self.headnode_size = config['headnode_size']
        self.worker_size = config['worker_size']
        self.worker_count = int(config['worker_count'])
        self.gateway_username = config['gateway_username']
        self.gateway_password = config['gateway_password']
        self.ssh_username = config['ssh_username']
        #TODO: implement ssh with uploaded key
        self.ssh_password = config['ssh_password']

        self.storage_account_name = '{}.blob.core.windows.net'.format(
            config['storage_account'])
        self.storage_account_key = config['storage_account_key']
        self.storage_account_container = config['storage_account_container']

        self.vnet_name = config['vnet_name']
        self.subnet_name = config['subnet_name']
        self.vnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}'.format(
            subsId=self.subscription_id,
            rgName=self.resource_group_name,
            vnetName=self.vnet_name)
        self.subnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}/subnets/{subnetName}'.format(
            subsId=self.subscription_id,
            rgName=self.resource_group_name,
            vnetName=self.vnet_name,
            subnetName=self.subnet_name)
        self.vnet_profile = VirtualNetworkProfile(id=self.vnet_id,
                                                  subnet=self.subnet_id)

        #TODO: better test the subscription_id here ?
        self.hdi_client = HDInsightManagementClient(
            self.aad_client_credentials, self.subscription_id)