def test_environment(): toolkit = Toolkit(software="spark", version="2.2.0", environment="miniconda") assert toolkit.software == "spark" assert toolkit.version == "2.2.0" assert toolkit.environment == "miniconda"
def _merge_dict(self, config): config = config.get('job') if config.get('id') is not None: self.id = config['id'] cluster_configuration = config.get('cluster_configuration') if cluster_configuration: self.vm_size = cluster_configuration.get('vm_size') self.toolkit = Toolkit.from_dict( cluster_configuration.get('toolkit')) if cluster_configuration.get('size') is not None: self.max_dedicated_nodes = cluster_configuration.get('size') if cluster_configuration.get('size_low_priority') is not None: self.max_low_pri_nodes = cluster_configuration.get( 'size_low_priority') self.custom_scripts = cluster_configuration.get('custom_scripts') self.subnet_id = cluster_configuration.get('subnet_id') self.worker_on_master = cluster_configuration.get( "worker_on_master") scheduling_target = cluster_configuration.get("scheduling_target") if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) applications = config.get('applications') if applications: self.applications = [] for application in applications: self.applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get('name'), application=application.get('application'), application_args=application.get('application_args'), main_class=application.get('main_class'), jars=application.get('jars'), py_files=application.get('py_files'), files=application.get('files'), driver_java_options=application.get( 'driver_java_options'), driver_library_path=application.get( 'driver_library_path'), driver_class_path=application.get('driver_class_path'), driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), executor_cores=application.get('executor_cores'))) spark_configuration = config.get('spark_configuration') if spark_configuration: self.spark_defaults_conf = self.__convert_to_path( spark_configuration.get('spark_defaults_conf')) self.spark_env_sh = self.__convert_to_path( spark_configuration.get('spark_env_sh')) self.core_site_xml = self.__convert_to_path( spark_configuration.get('core_site_xml')) self.jars = [ self.__convert_to_path(jar) for jar in spark_configuration.get('jars') or [] ]
def cluster_config_from_dict(config: dict): output = ClusterConfiguration() wait = False if config.get('id') is not None: output.cluster_id = config['id'] if config.get('vm_size') is not None: output.vm_size = config['vm_size'] if config.get('size'): output.vm_count = config['size'] if config.get('size_low_pri'): output.vm_low_pri_count = config['size_low_pri'] if config.get('subnet_id') is not None: output.subnet_id = config['subnet_id'] if config.get('username') is not None: output.user_configuration = UserConfiguration( username=config['username']) if config.get('password') is not None: output.user_configuration.password = config['password'] if config.get('custom_scripts') not in [[None], None]: output.custom_scripts = [] for custom_script in config['custom_scripts']: output.custom_scripts.append( aztk.spark.models.CustomScript(script=custom_script['script'], run_on=custom_script['runOn'])) if config.get('azure_files') not in [[None], None]: output.file_shares = [] for file_share in config['azure_files']: output.file_shares.append( aztk.spark.models.FileShare( storage_account_name=file_share['storage_account_name'], storage_account_key=file_share['storage_account_key'], file_share_path=file_share['file_share_path'], mount_path=file_share['mount_path'], )) if config.get('toolkit') is not None: output.toolkit = Toolkit.from_dict(config['toolkit']) if config.get('plugins') not in [[None], None]: output.plugins = [] for plugin in config['plugins']: ref = PluginReference.from_dict(plugin) output.plugins.append(ref.get_plugin()) if config.get('worker_on_master') is not None: output.worker_on_master = config['worker_on_master'] if config.get('wait') is not None: wait = config['wait'] return output, wait
def _merge_dict(self, config): config = config.get("job") if config.get("id") is not None: self.id = config["id"] cluster_configuration = config.get("cluster_configuration") if cluster_configuration: self.vm_size = cluster_configuration.get("vm_size") self.toolkit = Toolkit.from_dict( cluster_configuration.get("toolkit")) if cluster_configuration.get("size") is not None: self.max_dedicated_nodes = cluster_configuration.get("size") if cluster_configuration.get("size_low_priority") is not None: self.max_low_pri_nodes = cluster_configuration.get( "size_low_priority") self.subnet_id = cluster_configuration.get("subnet_id") self.worker_on_master = cluster_configuration.get( "worker_on_master") scheduling_target = cluster_configuration.get("scheduling_target") if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) applications = config.get("applications") if applications: self.applications = [] for application in applications: self.applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get("name"), application=application.get("application"), application_args=application.get("application_args"), main_class=application.get("main_class"), jars=application.get("jars"), py_files=application.get("py_files"), files=application.get("files"), driver_java_options=application.get( "driver_java_options"), driver_library_path=application.get( "driver_library_path"), driver_class_path=application.get("driver_class_path"), driver_memory=application.get("driver_memory"), executor_memory=application.get("executor_memory"), driver_cores=application.get("driver_cores"), executor_cores=application.get("executor_cores"), )) spark_configuration = config.get("spark_configuration") if spark_configuration: self.spark_defaults_conf = _convert_to_path( spark_configuration.get("spark_defaults_conf")) self.spark_env_sh = _convert_to_path( spark_configuration.get("spark_env_sh")) self.core_site_xml = _convert_to_path( spark_configuration.get("core_site_xml")) self.jars = [ _convert_to_path(jar) for jar in spark_configuration.get("jars") or [] ]
def execute(args: typing.NamedTuple): if not args.toolkit_software: return print_available_softwares() if not validate_software(args.toolkit_software): return None if not args.version: return print_available_software_version(args.toolkit_software) if not args.environment: print_available_environments(args.toolkit_software) toolkit = Toolkit(software=args.toolkit_software, version=args.version, environment=args.environment) toolkit.validate() log.info("Docker image picked for this toolkit: %s", toolkit.get_docker_repo(args.gpu)) return None
def test_scheduling_target_dedicated_with_no_dedicated_nodes_raise_error(): with pytest.raises(InvalidModelError, match="Scheduling target cannot be Dedicated if dedicated vm size is 0"): conf = ClusterConfiguration( cluster_id="abc", scheduling_target=SchedulingTarget.Dedicated, vm_size="standard_a2", size=0, size_low_priority=2, toolkit=Toolkit(software="spark", version="1.6.3"), ) conf.validate()
def test_valid_software_and_version(): Toolkit(software="spark", version="2.2.0").validate()
def test_missing_version_raise_error(): with pytest.raises(InvalidModelError): Toolkit(software="spark", version=None).validate()
def test_basic_toolkit(): toolkit = Toolkit(software="spark", version="2.2.0") assert toolkit.software == "spark" assert toolkit.version == "2.2.0"
def test_get_right_docker_repo_with_env_for_gpu(): repo = Toolkit(software="spark", version="2.2.0", environment="miniconda").get_docker_repo(True) assert repo == "aztk/spark:v{0}-spark2.2.0-miniconda-gpu".format( docker_image_version)
def test_get_right_docker_repo(): repo = Toolkit(software="spark", version="2.2.0").get_docker_repo(False) assert repo == "aztk/spark:v{0}-spark2.2.0-base".format( docker_image_version)
def test_get_right_docker_repo_for_gpu(): repo = Toolkit(software="spark", version="2.1.0").get_docker_repo(True) assert repo == "aztk/spark:v{0}-spark2.1.0-gpu".format( docker_image_version)
def test_invalid_environment_version_raise_error(): with pytest.raises(InvalidModelError): Toolkit(software="spark", version="2.2.0", environment="miniconda", environment_version="7.1.9").validate()
def test_invalid_version_raise_error(): with pytest.raises(InvalidModelError): Toolkit(software="spark", version="780.0").validate()
def test_valid_software_version_and_environment(): Toolkit(software="spark", version="2.2.0", environment="miniconda").validate()
def test_invalid_software_raise_error(): with pytest.raises(InvalidModelError): Toolkit(software="non-supported", version="2.2.0").validate()
def test_missing_software_raise_error(): with pytest.raises(InvalidModelError): Toolkit(software=None, version="2.2.0").validate()