def cluster_config_from_dict(config: dict): output = ClusterConfiguration() wait = False if config.get('id') is not None: output.cluster_id = config['id'] if config.get('vm_size') is not None: output.vm_size = config['vm_size'] if config.get('size'): output.vm_count = config['size'] if config.get('size_low_pri'): output.vm_low_pri_count = config['size_low_pri'] if config.get('subnet_id') is not None: output.subnet_id = config['subnet_id'] if config.get('username') is not None: output.user_configuration = UserConfiguration( username=config['username']) if config.get('password') is not None: output.user_configuration.password = config['password'] if config.get('custom_scripts') not in [[None], None]: output.custom_scripts = [] for custom_script in config['custom_scripts']: output.custom_scripts.append( aztk.spark.models.CustomScript(script=custom_script['script'], run_on=custom_script['runOn'])) if config.get('azure_files') not in [[None], None]: output.file_shares = [] for file_share in config['azure_files']: output.file_shares.append( aztk.spark.models.FileShare( storage_account_name=file_share['storage_account_name'], storage_account_key=file_share['storage_account_key'], file_share_path=file_share['file_share_path'], mount_path=file_share['mount_path'], )) if config.get('docker_repo') is not None: output.docker_repo = config['docker_repo'] if config.get('plugins') not in [[None], None]: output.plugins = [] for plugin in config['plugins']: ref = PluginReference.from_dict(plugin) output.plugins.append(ref.get_plugin()) if config.get('worker_on_master') is not None: output.worker_on_master = config['worker_on_master'] if config.get('wait') is not None: wait = config['wait'] return output, wait
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf) # read cluster.yaml configuration file, overwrite values with args file_config, wait = config.read_cluster_config() if args.cluster_path is None \ else config.read_cluster_config(args.cluster_path) cluster_conf.merge(file_config) cluster_conf.merge( ClusterConfiguration( cluster_id=args.cluster_id, size=args.size, size_low_priority=args.size_low_priority, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration(username=args.username, password=args.password), )) if cluster_conf.toolkit: if args.docker_repo: cluster_conf.toolkit.docker_repo = args.docker_repo if args.docker_run_options: cluster_conf.toolkit.docker_run_options = args.docker_run_options wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_configuration.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_configuration, ) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key) else: cluster_conf.user_configuration = None cluster_conf.validate() utils.print_cluster_conf(cluster_conf, wait) with utils.Spinner(): # create spark cluster cluster = spark_client.cluster.create( cluster_configuration=cluster_conf, vm_ver=args.vm_os_ver, wait=wait) if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuration file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) if args.size_low_pri is not None: deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.") args.size_low_priority = args.size_low_pri cluster_conf.merge(ClusterConfiguration( cluster_id=args.cluster_id, size=args.size, size_low_priority=args.size_low_priority, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ))) if args.docker_repo and cluster_conf.toolkit: cluster_conf.toolkit.docker_repo = args.docker_repo wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key ) else: cluster_conf.user_configuration = None cluster_conf.validate() utils.print_cluster_conf(cluster_conf, wait) with utils.Spinner(): # create spark cluster cluster = spark_client.create_cluster( cluster_conf, wait=wait ) if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def createCluster(self): # create a client client = aztk.spark.Client(self.secrets_confg) # list available clusters clusters = client.list_clusters() SPARK_CONFIG_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), 'spark', 'spark', '.config')) SPARK_JARS_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), 'spark','spark', 'jars')) SPARK_CORE_SITE = os.path.join(SPARK_CONFIG_PATH, 'core-site.xml') jars = glob.glob(os.path.join(SPARK_JARS_PATH, '*.jar')) # define spark configuration spark_conf = aztk.spark.models.SparkConfiguration( spark_defaults_conf=os.path.join(SPARK_CONFIG_PATH, 'spark-defaults.conf'), spark_env_sh=os.path.join(SPARK_CONFIG_PATH, 'spark-env.sh'), core_site_xml=SPARK_CORE_SITE, jars=jars ) clusterDetails = self.table_service.get_entity('cluster', 'predictivemaintenance', 'predictivemaintenance') cluster_number = int(clusterDetails.ClusterNumber) + 1 cluster_id = clusterDetails.PartitionKey + str(cluster_number) jupyterCustomScript = aztk.models.CustomScript("jupyter", "D:/home/site/wwwroot/flask/spark/customScripts/jupyter.sh", "all-nodes") azuremlProjectFileShare = aztk.models.FileShare(self.STORAGE_ACCOUNT_NAME, self.STORAGE_ACCOUNT_KEY, 'azureml-project', '/mnt/azureml-project') azuremlFileShare = aztk.models.FileShare(self.STORAGE_ACCOUNT_NAME, self.STORAGE_ACCOUNT_KEY, 'azureml-share', '/mnt/azureml-share') # configure my cluster cluster_config = aztk.spark.models.ClusterConfiguration( docker_repo='aztk/python:spark2.2.0-python3.6.2-base', cluster_id= cluster_id, # Warning: this name must be a valid Azure Blob Storage container name vm_count=self.vm_count, # vm_low_pri_count=2, #this and vm_count are mutually exclusive vm_size=self.sku_type, custom_scripts=[jupyterCustomScript], spark_configuration=spark_conf, file_shares=[azuremlProjectFileShare, azuremlFileShare], user_configuration=UserConfiguration( username=self.username, password=self.password, ) ) try: cluster = client.create_cluster(cluster_config) except Exception as e: clusterDetails = {'PartitionKey': 'predictivemaintenance', 'RowKey': 'predictivemaintenance', 'Status': ClusterStatus.Failed, 'UserName': self.username,'ClusterNumber': cluster_number,'Message': str(e)} self.table_service.insert_or_merge_entity('cluster', clusterDetails) return clusterDetails = {'PartitionKey': 'predictivemaintenance', 'RowKey': 'predictivemaintenance', 'Status': ClusterStatus.Provisioning, 'UserName': self.username,'ClusterNumber': cluster_number} self.table_service.insert_or_merge_entity('cluster', clusterDetails)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuartion file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) cluster_conf.merge(ClusterConfiguration( cluster_id=args.cluster_id, vm_count=args.size, vm_low_pri_count=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ), docker_repo=args.docker_repo)) wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key ) else: cluster_conf.user_configuration = None utils.print_cluster_conf(cluster_conf, wait) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster( cluster_conf, wait=wait ) spinner.stop() if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)