def _merge_dict(self, config): config = config.get('job') if config.get('id') is not None: self.id = config['id'] cluster_configuration = config.get('cluster_configuration') if cluster_configuration: self.vm_size = cluster_configuration.get('vm_size') self.toolkit = Toolkit.from_dict( cluster_configuration.get('toolkit')) if cluster_configuration.get('size') is not None: self.max_dedicated_nodes = cluster_configuration.get('size') if cluster_configuration.get('size_low_priority') is not None: self.max_low_pri_nodes = cluster_configuration.get( 'size_low_priority') self.custom_scripts = cluster_configuration.get('custom_scripts') self.subnet_id = cluster_configuration.get('subnet_id') self.worker_on_master = cluster_configuration.get( "worker_on_master") scheduling_target = cluster_configuration.get("scheduling_target") if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) applications = config.get('applications') if applications: self.applications = [] for application in applications: self.applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get('name'), application=application.get('application'), application_args=application.get('application_args'), main_class=application.get('main_class'), jars=application.get('jars'), py_files=application.get('py_files'), files=application.get('files'), driver_java_options=application.get( 'driver_java_options'), driver_library_path=application.get( 'driver_library_path'), driver_class_path=application.get('driver_class_path'), driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), executor_cores=application.get('executor_cores'))) spark_configuration = config.get('spark_configuration') if spark_configuration: self.spark_defaults_conf = self.__convert_to_path( spark_configuration.get('spark_defaults_conf')) self.spark_env_sh = self.__convert_to_path( spark_configuration.get('spark_env_sh')) self.core_site_xml = self.__convert_to_path( spark_configuration.get('core_site_xml')) self.jars = [ self.__convert_to_path(jar) for jar in spark_configuration.get('jars') or [] ]
def _merge_dict(self, config): config = config.get("job") if config.get("id") is not None: self.id = config["id"] cluster_configuration = config.get("cluster_configuration") if cluster_configuration: self.vm_size = cluster_configuration.get("vm_size") self.toolkit = Toolkit.from_dict( cluster_configuration.get("toolkit")) if cluster_configuration.get("size") is not None: self.max_dedicated_nodes = cluster_configuration.get("size") if cluster_configuration.get("size_low_priority") is not None: self.max_low_pri_nodes = cluster_configuration.get( "size_low_priority") self.subnet_id = cluster_configuration.get("subnet_id") self.worker_on_master = cluster_configuration.get( "worker_on_master") scheduling_target = cluster_configuration.get("scheduling_target") if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) applications = config.get("applications") if applications: self.applications = [] for application in applications: self.applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get("name"), application=application.get("application"), application_args=application.get("application_args"), main_class=application.get("main_class"), jars=application.get("jars"), py_files=application.get("py_files"), files=application.get("files"), driver_java_options=application.get( "driver_java_options"), driver_library_path=application.get( "driver_library_path"), driver_class_path=application.get("driver_class_path"), driver_memory=application.get("driver_memory"), executor_memory=application.get("executor_memory"), driver_cores=application.get("driver_cores"), executor_cores=application.get("executor_cores"), )) spark_configuration = config.get("spark_configuration") if spark_configuration: self.spark_defaults_conf = _convert_to_path( spark_configuration.get("spark_defaults_conf")) self.spark_env_sh = _convert_to_path( spark_configuration.get("spark_env_sh")) self.core_site_xml = _convert_to_path( spark_configuration.get("core_site_xml")) self.jars = [ _convert_to_path(jar) for jar in spark_configuration.get("jars") or [] ]
def cluster_config_from_dict(config: dict): output = ClusterConfiguration() wait = False if config.get('id') is not None: output.cluster_id = config['id'] if config.get('vm_size') is not None: output.vm_size = config['vm_size'] if config.get('size'): output.vm_count = config['size'] if config.get('size_low_pri'): output.vm_low_pri_count = config['size_low_pri'] if config.get('subnet_id') is not None: output.subnet_id = config['subnet_id'] if config.get('username') is not None: output.user_configuration = UserConfiguration( username=config['username']) if config.get('password') is not None: output.user_configuration.password = config['password'] if config.get('custom_scripts') not in [[None], None]: output.custom_scripts = [] for custom_script in config['custom_scripts']: output.custom_scripts.append( aztk.spark.models.CustomScript(script=custom_script['script'], run_on=custom_script['runOn'])) if config.get('azure_files') not in [[None], None]: output.file_shares = [] for file_share in config['azure_files']: output.file_shares.append( aztk.spark.models.FileShare( storage_account_name=file_share['storage_account_name'], storage_account_key=file_share['storage_account_key'], file_share_path=file_share['file_share_path'], mount_path=file_share['mount_path'], )) if config.get('toolkit') is not None: output.toolkit = Toolkit.from_dict(config['toolkit']) if config.get('plugins') not in [[None], None]: output.plugins = [] for plugin in config['plugins']: ref = PluginReference.from_dict(plugin) output.plugins.append(ref.get_plugin()) if config.get('worker_on_master') is not None: output.worker_on_master = config['worker_on_master'] if config.get('wait') is not None: wait = config['wait'] return output, wait