Esempio n. 1
0
def cluster_config_from_dict(config: dict):
    output = ClusterConfiguration()
    wait = False
    if config.get('id') is not None:
        output.cluster_id = config['id']

    if config.get('vm_size') is not None:
        output.vm_size = config['vm_size']

    if config.get('size'):
        output.vm_count = config['size']

    if config.get('size_low_pri'):
        output.vm_low_pri_count = config['size_low_pri']

    if config.get('subnet_id') is not None:
        output.subnet_id = config['subnet_id']

    if config.get('username') is not None:
        output.user_configuration = UserConfiguration(
            username=config['username'])

        if config.get('password') is not None:
            output.user_configuration.password = config['password']

    if config.get('custom_scripts') not in [[None], None]:
        output.custom_scripts = []
        for custom_script in config['custom_scripts']:
            output.custom_scripts.append(
                aztk.spark.models.CustomScript(script=custom_script['script'],
                                               run_on=custom_script['runOn']))

    if config.get('azure_files') not in [[None], None]:
        output.file_shares = []
        for file_share in config['azure_files']:
            output.file_shares.append(
                aztk.spark.models.FileShare(
                    storage_account_name=file_share['storage_account_name'],
                    storage_account_key=file_share['storage_account_key'],
                    file_share_path=file_share['file_share_path'],
                    mount_path=file_share['mount_path'],
                ))

    if config.get('docker_repo') is not None:
        output.docker_repo = config['docker_repo']

    if config.get('plugins') not in [[None], None]:
        output.plugins = []
        for plugin in config['plugins']:
            ref = PluginReference.from_dict(plugin)
            output.plugins.append(ref.get_plugin())

    if config.get('worker_on_master') is not None:
        output.worker_on_master = config['worker_on_master']

    if config.get('wait') is not None:
        wait = config['wait']

    return output, wait
Esempio n. 2
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf)

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config() if args.cluster_path is None \
        else config.read_cluster_config(args.cluster_path)
    cluster_conf.merge(file_config)

    cluster_conf.merge(
        ClusterConfiguration(
            cluster_id=args.cluster_id,
            size=args.size,
            size_low_priority=args.size_low_priority,
            vm_size=args.vm_size,
            subnet_id=args.subnet_id,
            user_configuration=UserConfiguration(username=args.username,
                                                 password=args.password),
        ))

    if cluster_conf.toolkit:
        if args.docker_repo:
            cluster_conf.toolkit.docker_repo = args.docker_repo
        if args.docker_run_options:
            cluster_conf.toolkit.docker_run_options = args.docker_run_options

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_configuration.ssh_pub_key,
            user_configuration.username,
            user_configuration.password,
            spark_client.secrets_configuration,
        )
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key)
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.cluster.create(
            cluster_configuration=cluster_conf,
            vm_ver=args.vm_os_ver,
            wait=wait)

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Esempio n. 3
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    if args.size_low_pri is not None:
        deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.")
        args.size_low_priority = args.size_low_pri

    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        size=args.size,
        size_low_priority=args.size_low_priority,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        )))

    if args.docker_repo and cluster_conf.toolkit:
        cluster_conf.toolkit.docker_repo = args.docker_repo

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.create_cluster(
            cluster_conf,
            wait=wait
        )

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Esempio n. 4
0
    def createCluster(self):


        # create a client
        client = aztk.spark.Client(self.secrets_confg)

        # list available clusters
        clusters = client.list_clusters()

        SPARK_CONFIG_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), 'spark', 'spark', '.config'))
        SPARK_JARS_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), 'spark','spark', 'jars'))

        SPARK_CORE_SITE = os.path.join(SPARK_CONFIG_PATH, 'core-site.xml')

        jars = glob.glob(os.path.join(SPARK_JARS_PATH, '*.jar'))

        # define spark configuration
        spark_conf = aztk.spark.models.SparkConfiguration(
            spark_defaults_conf=os.path.join(SPARK_CONFIG_PATH, 'spark-defaults.conf'),
            spark_env_sh=os.path.join(SPARK_CONFIG_PATH, 'spark-env.sh'),
            core_site_xml=SPARK_CORE_SITE,
            jars=jars
        )

        clusterDetails = self.table_service.get_entity('cluster', 'predictivemaintenance', 'predictivemaintenance')
        cluster_number = int(clusterDetails.ClusterNumber) + 1
        cluster_id = clusterDetails.PartitionKey + str(cluster_number)

        jupyterCustomScript = aztk.models.CustomScript("jupyter", "D:/home/site/wwwroot/flask/spark/customScripts/jupyter.sh", "all-nodes")        
        azuremlProjectFileShare = aztk.models.FileShare(self.STORAGE_ACCOUNT_NAME, self.STORAGE_ACCOUNT_KEY, 'azureml-project', '/mnt/azureml-project')
        azuremlFileShare = aztk.models.FileShare(self.STORAGE_ACCOUNT_NAME, self.STORAGE_ACCOUNT_KEY, 'azureml-share', '/mnt/azureml-share')
        # configure my cluster
        cluster_config = aztk.spark.models.ClusterConfiguration(
            docker_repo='aztk/python:spark2.2.0-python3.6.2-base',
            cluster_id= cluster_id, # Warning: this name must be a valid Azure Blob Storage container name
            vm_count=self.vm_count,
            # vm_low_pri_count=2, #this and vm_count are mutually exclusive
            vm_size=self.sku_type,
            custom_scripts=[jupyterCustomScript],
            spark_configuration=spark_conf,
            file_shares=[azuremlProjectFileShare, azuremlFileShare],
            user_configuration=UserConfiguration(
                username=self.username,
                password=self.password,
            )
        )
        try:
            cluster = client.create_cluster(cluster_config)
        except Exception as e:
            clusterDetails = {'PartitionKey': 'predictivemaintenance', 'RowKey': 'predictivemaintenance', 'Status': ClusterStatus.Failed, 'UserName': self.username,'ClusterNumber': cluster_number,'Message': str(e)}
            self.table_service.insert_or_merge_entity('cluster', clusterDetails)
            return
        
        clusterDetails = {'PartitionKey': 'predictivemaintenance', 'RowKey': 'predictivemaintenance', 'Status': ClusterStatus.Provisioning, 'UserName': self.username,'ClusterNumber': cluster_number}
        self.table_service.insert_or_merge_entity('cluster', clusterDetails)
Esempio n. 5
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuartion file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        vm_count=args.size,
        vm_low_pri_count=args.size_low_pri,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        ),
        docker_repo=args.docker_repo))
    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    utils.print_cluster_conf(cluster_conf, wait)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(
        cluster_conf,
        wait=wait
    )

    spinner.stop()

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)