Example #1
0
def main():
    SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID", None)
    TIME = str(time.time()).replace('.', '')
    GROUP_NAME = "testhdinsight" + TIME

    # Create client
    # # For other authentication approaches, please see: https://pypi.org/project/azure-identity/
    resource_client = ResourceManagementClient(
        credential=DefaultAzureCredential(), subscription_id=SUBSCRIPTION_ID)

    hdinsight_client = HDInsightManagementClient(
        credential=DefaultAzureCredential(), subscription_id=SUBSCRIPTION_ID)
    # - init depended client -
    # - end -

    # Create resource group
    resource_client.resource_groups.create_or_update(GROUP_NAME,
                                                     {"location": "eastus"})

    # - init depended resources -
    # - end -

    # Create hdinsight
    hdinsight_client.operations.list()
    print("operations:\n")

    # Delete Group
    resource_client.resource_groups.begin_delete(GROUP_NAME).result()
 def delete_cluster(self):
     client = HDInsightManagementClient(self.get_credential(),
                                        self.subscription_id)
     delete_poller = client.clusters.delete(self.resource_group_name,
                                            cluster_name=self.cluster_name)
     delete_poller.wait()
     return delete_poller.result()
Example #3
0
def main():
    # Authentication
    credentials = ServicePrincipalCredentials(client_id=CLIENT_ID,
                                              secret=CLIENT_SECRET,
                                              tenant=TENANT_ID)

    client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID)

    # Parse ADLS Gen2 storage account name from resource id
    adls_gen2_account_name = ADLS_GEN2_RESOURCE_ID.split('/')[-1]

    # Prepare cluster create parameters
    create_params = ClusterCreateParametersExtended(
        location=LOCATION,
        tags={},
        properties=ClusterCreateProperties(
            cluster_version="3.6",
            os_type=OSType.linux,
            tier=Tier.standard,
            cluster_definition=ClusterDefinition(
                kind="Hadoop",
                configurations={
                    "gateway": {
                        "restAuthCredential.isEnabled": "true",
                        "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
                        "restAuthCredential.password": PASSWORD
                    }
                }),
            compute_profile=ComputeProfile(roles=[
                Role(name="headnode",
                     target_instance_count=2,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD))),
                Role(name="workernode",
                     target_instance_count=3,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)))
            ]),
            storage_profile=StorageProfile(storageaccounts=[
                StorageAccount(name=adls_gen2_account_name +
                               DFS_ENDPOINT_SUFFIX,
                               is_default=True,
                               file_system=ADLS_GEN2_FILE_SYSTEM_NAME.lower(),
                               resource_id=ADLS_GEN2_RESOURCE_ID,
                               msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID)
            ])),
        identity=ClusterIdentity(
            type=ResourceIdentityType.user_assigned,
            user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}}))

    print(
        'Starting to create HDInsight Hadoop cluster {} with Azure Data Lake Storage Gen2'
        .format(CLUSTER_NAME))
    client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
Example #4
0
def main():
    # Authentication
    credentials = ServicePrincipalCredentials(client_id=CLIENT_ID,
                                              secret=CLIENT_SECRET,
                                              tenant=TENANT_ID)

    client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID)

    # Prepare cluster create parameters
    create_params = ClusterCreateParametersExtended(
        location=LOCATION,
        tags={},
        properties=ClusterCreateProperties(
            cluster_version="3.6",
            os_type=OSType.linux,
            tier=Tier.standard,
            cluster_definition=ClusterDefinition(
                kind="Kafka",
                configurations={
                    "gateway": {
                        "restAuthCredential.isEnabled": "true",
                        "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
                        "restAuthCredential.password": PASSWORD
                    }
                }),
            compute_profile=ComputeProfile(roles=[
                Role(name="headnode",
                     target_instance_count=2,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD))),
                Role(name="workernode",
                     target_instance_count=3,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)),
                     data_disks_groups=[DataDisksGroups(disks_per_node=2)]),
                Role(name="zookeepernode",
                     target_instance_count=3,
                     hardware_profile=HardwareProfile(vm_size="Small"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)))
            ]),
            storage_profile=StorageProfile(storageaccounts=[
                StorageAccount(name=STORAGE_ACCOUNT_NAME +
                               BLOB_ENDPOINT_SUFFIX,
                               key=STORAGE_ACCOUNT_KEY,
                               container=CONTAINER_NAME.lower(),
                               is_default=True)
            ])))

    print('Starting to create to create HDInsight Kafka cluster {}'.format(
        CLUSTER_NAME))
    client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
Example #5
0
    def get_conn(self):
        """
        Return a HDInsight client.

        This hook requires a service principal in order to work.
        After creating this service principal
        (Azure Active Directory/App Registrations), you need to fill in the
        client_id (Application ID) as login, the generated password as password,
        and tenantId and subscriptionId in the extra's field as a json.

        References
        https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal
        https://docs.microsoft.com/en-us/python/api/overview/azure/key-vault?toc=%2Fpython%2Fazure%2FTOC.json&view=azure-python

        :return: HDInsight manage client
        :rtype: HDInsightManagementClient
        """
        conn = self.get_connection(self.conn_id)
        extra_options = conn.extra_dejson
        key_path = extra_options.get('key_path', False)
        self.resource_group_name = str(
            extra_options.get("resource_group_name"))
        self.resource_group_location = str(
            extra_options.get("resource_group_location"))
        if key_path:
            if key_path.endswith('.json'):
                self.log.info('Getting connection using a JSON key file.')
                return get_client_from_auth_file(HDInsightManagementClient,
                                                 key_path)
            else:
                raise AirflowException('Unrecognised extension for key file.')

        if os.environ.get('AZURE_AUTH_LOCATION'):
            key_path = os.environ.get('AZURE_AUTH_LOCATION')
            if key_path.endswith('.json'):
                self.log.info('Getting connection using a JSON key file.')
                return get_client_from_auth_file(HDInsightManagementClient,
                                                 key_path)
            else:
                raise AirflowException('Unrecognised extension for key file.')

        credentials = ServicePrincipalCredentials(
            client_id=conn.login,
            secret=conn.password,
            tenant=conn.extra_dejson['tenantId'])

        subscription_id = conn.extra_dejson['subscriptionId']
        return HDInsightManagementClient(credentials, str(subscription_id))
Example #6
0
def make_cluster_keys_and_data(aad_credentials, subscription_id,
                               hdi_cluster_name, hdi_cluster_rg):
    # aad_credentials of type azure.common.credentials.InteractiveCredentials, ServicePrincipalCredentials, UserPassCredentials
    hdi_client = HDInsightManagementClient(aad_credentials, subscription_id)
    cluster = hdi_client.clusters.get(hdi_cluster_rg, hdi_cluster_name)
    cluster_core_info = hdi_client.configurations.get(hdi_cluster_rg,
                                                      hdi_cluster_name,
                                                      'core-site')
    logging.info('HDI client retreived core info {}'.format(
        pformat(cluster_core_info)))

    cluster_gateway = hdi_client.configurations.get(hdi_cluster_rg,
                                                    hdi_cluster_name,
                                                    'gateway')
    try:
        ambari_user = cluster_gateway['restAuthCredential.username']
        ambari_pwd = cluster_gateway['restAuthCredential.password']
    except KeyError:
        logging.error('Could not retreive ambari gateway credentials')
        raise

    cluster_endpoints = cluster.properties.connectivity_endpoints
    ambari_host = [
        'https://' + e.location for e in cluster_endpoints
        if e.port == 443 and e.name == 'HTTPS'
    ][0]

    ambari_client = HdiAmbariClient(ambari_host, ambari_user, ambari_pwd)
    conf_tags = ambari_client.set_desired_configs_tags(hdi_cluster_name)
    logging.info('Updated config tags:\n {}'.format(pformat(conf_tags)))

    dss_config_builder = AbstractDSSConfigBuilder(hdi_cluster_name,
                                                  ambari_client)
    storage_info = dss_config_builder.make_storage_from_hdi_core_info(
        cluster_core_info)
    dss_config = dss_config_builder.make_dss_config(storage_info)
    logging.info('Returning DSS cluster config {}'.format(pformat(dss_config)))

    return [
        dss_config, {
            'hdiClusterId': hdi_cluster_name,
            'subscriptionId': subscription_id,
            'resourceGroupName': hdi_cluster_rg
        }
    ]
def main():
    # Authentication
    credentials = ServicePrincipalCredentials(client_id=CLIENT_ID,
                                              secret=CLIENT_SECRET,
                                              tenant=TENANT_ID)

    client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID)

    # Parse AAD-DS DNS Domain name from resource id
    aadds_dns_domain_name = AADDS_RESOURCE_ID.split('/')[-1]

    # Prepare cluster create parameters
    create_params = ClusterCreateParametersExtended(
        location=LOCATION,
        tags={},
        properties=ClusterCreateProperties(
            cluster_version="3.6",
            os_type=OSType.linux,
            tier=Tier.premium,
            cluster_definition=ClusterDefinition(
                kind="Spark",
                configurations={
                    "gateway": {
                        "restAuthCredential.isEnabled": "true",
                        "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
                        "restAuthCredential.password": PASSWORD
                    },
                    "hive-site": {
                        "javax.jdo.option.ConnectionDriverName":
                        "com.microsoft.sqlserver.jdbc.SQLServerDriver",
                        "javax.jdo.option.ConnectionURL":
                        "jdbc:sqlserver://%s;database=%s;encrypt=true;trustServerCertificate=true;create=false;loginTimeout=300"
                        .format(METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE),
                        "javax.jdo.option.ConnectionUserName":
                        METASTORE_SQL_USERNAME,
                        "javax.jdo.option.ConnectionPassword":
                        METASTORE_SQL_PASSWORD,
                    },
                    "hive-env": {
                        "hive_database":
                        "Existing MSSQL Server database with SQL authentication",
                        "hive_database_name": METASTORE_SQL_DATABASE,
                        "hive_database_type": "mssql",
                        "hive_existing_mssql_server_database":
                        METASTORE_SQL_DATABASE,
                        "hive_existing_mssql_server_host":
                        METASTORE_SQL_SERVER,
                        "hive_hostname": METASTORE_SQL_SERVER,
                    },
                    "ambari-conf": {
                        "database-server": METASTORE_SQL_SERVER,
                        "database-name": AMBARI_SQL_DATABASE,
                        "database-user-name": AMBARI_SQL_USERNAME,
                        "database-user-password": AMBARI_SQL_PASSWORD,
                    },
                    "admin-properties": {
                        "audit_db_name": METASTORE_SQL_DATABASE,
                        "audit_db_user": METASTORE_SQL_USERNAME,
                        "audit_db_password": METASTORE_SQL_PASSWORD,
                        "db_name": METASTORE_SQL_DATABASE,
                        "db_user": METASTORE_SQL_USERNAME,
                        "db_password": METASTORE_SQL_PASSWORD,
                        "db_host": METASTORE_SQL_SERVER,
                        "db_root_user": "",
                        "db_root_password": ""
                    },
                    "ranger-admin-site": {
                        "ranger.jpa.jdbc.url":
                        "jdbc:sqlserver://%s;databaseName==%s".format(
                            METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE)
                    },
                    "ranger-env": {
                        "ranger_privelege_user_jdbc_url":
                        "jdbc:sqlserver://%s;databaseName==%s".format(
                            METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE)
                    },
                    "ranger-hive-security": {
                        "ranger.plugin.hive.service.name":
                        RANGER_HIVE_PLUGIN_SERVICE_NAME
                    },
                    "ranger-yarn-security": {
                        "ranger.plugin.yarn.service.name":
                        RANGER_HIVE_PLUGIN_SERVICE_NAME
                    }
                }),
            compute_profile=ComputeProfile(roles=[
                Role(name="headnode",
                     target_instance_count=2,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)),
                     virtual_network_profile=VirtualNetworkProfile(
                         id=VIRTUAL_NETWORK_RESOURCE_ID,
                         subnet='{}/subnets/{}'.format(
                             VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))),
                Role(name="workernode",
                     target_instance_count=3,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)),
                     virtual_network_profile=VirtualNetworkProfile(
                         id=VIRTUAL_NETWORK_RESOURCE_ID,
                         subnet='{}/subnets/{}'.format(
                             VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME)))
            ]),
            storage_profile=StorageProfile(storageaccounts=[
                StorageAccount(name=STORAGE_ACCOUNT_NAME +
                               BLOB_ENDPOINT_SUFFIX,
                               key=STORAGE_ACCOUNT_KEY,
                               container=CONTAINER_NAME.lower(),
                               is_default=True)
            ]),
            security_profile=SecurityProfile(
                directory_type=DirectoryType.active_directory,
                ldaps_urls=[LDAPS_URL],
                domain_username=DOMAIN_USER_NAME,
                domain=aadds_dns_domain_name,
                cluster_users_group_dns=[CLUSTER_ACCESS_GROUP],
                aadds_resource_id=AADDS_RESOURCE_ID,
                msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID),
            disk_encryption_properties=DiskEncryptionProperties(
                vault_uri=ENCRYPTION_VAULT_URI,
                key_name=ENCRYPTION_KEY_NAME,
                key_version=ENCRYPTION_KEY_VERSION,
                encryption_algorithm=ENCRYPTION_ALGORITHM,
                msi_resource_id=ASSIGN_IDENTITY)),
        identity=ClusterIdentity(
            type=ResourceIdentityType.user_assigned,
            user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}}))

    print(
        'Starting to create HDInsight Spark cluster {} with Enterprise Security Package'
        .format(CLUSTER_NAME))
    create_poller = client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME,
                                           create_params)
    cluster_response = create_poller.result()

    if CLUSTER_NAME == cluster_response.name & cluster_response.id.endswith(
            CLUSTER_NAME
    ) & "Running" == cluster_response.properties.cluster_state & "Microsoft.HDInsight/clusters" & cluster_response.type:
        return 0
    return 1
def main():
    # Authentication
    credentials = ServicePrincipalCredentials(client_id=CLIENT_ID,
                                              secret=CLIENT_SECRET,
                                              tenant=TENANT_ID)

    client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID)

    # Parse AAD-DS DNS Domain name from resource id
    aadds_dns_domain_name = AADDS_RESOURCE_ID.split('/')[-1]

    # Prepare cluster create parameters
    create_params = ClusterCreateParametersExtended(
        location=LOCATION,
        tags={},
        properties=ClusterCreateProperties(
            cluster_version="3.6",
            os_type=OSType.linux,
            tier=Tier.premium,
            cluster_definition=ClusterDefinition(
                kind="Spark",
                configurations={
                    "gateway": {
                        "restAuthCredential.isEnabled": "true",
                        "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
                        "restAuthCredential.password": PASSWORD
                    }
                }),
            compute_profile=ComputeProfile(roles=[
                Role(name="headnode",
                     target_instance_count=2,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)),
                     virtual_network_profile=VirtualNetworkProfile(
                         id=VIRTUAL_NETWORK_RESOURCE_ID,
                         subnet='{}/subnets/{}'.format(
                             VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))),
                Role(name="workernode",
                     target_instance_count=3,
                     hardware_profile=HardwareProfile(vm_size="Large"),
                     os_profile=OsProfile(
                         linux_operating_system_profile=
                         LinuxOperatingSystemProfile(username=SSH_USER_NAME,
                                                     password=PASSWORD)),
                     virtual_network_profile=VirtualNetworkProfile(
                         id=VIRTUAL_NETWORK_RESOURCE_ID,
                         subnet='{}/subnets/{}'.format(
                             VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME)))
            ]),
            storage_profile=StorageProfile(storageaccounts=[
                StorageAccount(name=STORAGE_ACCOUNT_NAME +
                               BLOB_ENDPOINT_SUFFIX,
                               key=STORAGE_ACCOUNT_KEY,
                               container=CONTAINER_NAME.lower(),
                               is_default=True)
            ]),
            security_profile=SecurityProfile(
                directory_type=DirectoryType.active_directory,
                ldaps_urls=[LDAPS_URL],
                domain_username=DOMAIN_USER_NAME,
                domain=aadds_dns_domain_name,
                cluster_users_group_dns=[CLUSTER_ACCESS_GROUP],
                aadds_resource_id=AADDS_RESOURCE_ID,
                msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID)),
        identity=ClusterIdentity(
            type=ResourceIdentityType.user_assigned,
            user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}}))

    print(
        'Starting to create HDInsight Spark cluster {} with Enterprise Security Package'
        .format(CLUSTER_NAME))
    client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
Example #9
0
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.hdinsight import HDInsightManagementClient
from azure.mgmt.hdinsight.models import *
import config



SUBSCRIPTION_ID =  config.azurerba['subscriptionId']

credentials = ServicePrincipalCredentials(
    client_id=config.azurerba['client_id'],
    secret=config.azurerba['client_secret'],
    tenant=config.azurerba['tenant']
)

client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID)
# The name for the cluster you are creating
cluster_name = "biglakehdinsightauto"
# The name of your existing Resource Group
resource_group_name = "bdap-poc-playground"
# Choose a username
username = config.azure['username']
# Choose a password
password = config.azure['password']
# Replace <> with the name of your storage account
storage_account = "biglakestorageccountgen2.dfs.core.windows.net"
# biglakestorageaccount.dfs.core.windows.net
# Storage account key you obtained above
storage_account_key = config.azurerba['storage_account_key']
# Choose a region
location = "West Europe"
Example #10
0
    def __init__(self, cluster_id, cluster_name, config, plugin_config):
        """
        :param cluster_id: the DSS identifier for this instance of cluster
        :param cluster_name: the name given by the user to this cluster
        :param config: the dict of the configuration of the object
        :param plugin_config: contains the plugin settings
        """
        self.cluster_id = cluster_id
        self.dss_cluster_name = cluster_name
        self.config = config
        self.plugin_config = plugin_config

        self.aad_client_credentials = None

        #TODO: check when credentials are not the right way or incorrect
        if config['aadAuth'] == "user_pass":
            print("Using User Password authentication")
            self.aad_username = config['aad_username']
            self.aad_password = config['aad_password']
            self.aad_client_credentials = UserPassCredentials(
                username=self.aad_username, password=self.aad_password)
        elif config['aadAuth'] == "service_principal":
            print('Using Service Principal for authentication')
            self.client_id = config['client_id']
            self.client_secret = config['client_secret']
            self.tenant_id = config['tenant_id']
            self.aad_client_credentials = ServicePrincipalCredentials(
                self.client_id, self.client_secret, tenant=self.tenant_id)
        else:
            raise ValueError('Unsupported authentication method')

        #params
        self.subscription_id = config['subscription_id']
        self.cluster_version = config['cluster_version']
        self.hdi_cluster_name = config['basename']
        self.resource_group_name = config['resource_group_name']
        self.location = config['location']
        #TODO: should retreive available formats for output in case of error?
        self.headnode_size = config['headnode_size']
        self.worker_size = config['worker_size']
        self.worker_count = int(config['worker_count'])
        self.gateway_username = config['gateway_username']
        self.gateway_password = config['gateway_password']
        self.ssh_username = config['ssh_username']
        #TODO: implement ssh with uploaded key
        self.ssh_password = config['ssh_password']

        self.storage_account_name = '{}.blob.core.windows.net'.format(
            config['storage_account'])
        self.storage_account_key = config['storage_account_key']
        self.storage_account_container = config['storage_account_container']

        self.vnet_name = config['vnet_name']
        self.subnet_name = config['subnet_name']
        self.vnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}'.format(
            subsId=self.subscription_id,
            rgName=self.resource_group_name,
            vnetName=self.vnet_name)
        self.subnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}/subnets/{subnetName}'.format(
            subsId=self.subscription_id,
            rgName=self.resource_group_name,
            vnetName=self.vnet_name,
            subnetName=self.subnet_name)
        self.vnet_profile = VirtualNetworkProfile(id=self.vnet_id,
                                                  subnet=self.subnet_id)

        #TODO: better test the subscription_id here ?
        self.hdi_client = HDInsightManagementClient(
            self.aad_client_credentials, self.subscription_id)