def test_set_yarn_spark_resource_config_fallback(
    patched_virtual_memory,
    patched_cpu_count,
    patched_yarn_config,
    patched_spark_config,
    default_bootstrapper: Bootstrapper,
) -> None:
    mocked_virtual_memory_total = PropertyMock(return_value=123 * 1024 * 1024)
    type(patched_virtual_memory.return_value
         ).total = mocked_virtual_memory_total
    patched_cpu_count.return_value = 456

    default_bootstrapper.load_processing_job_config = MagicMock(
        return_value=None)
    default_bootstrapper.load_instance_type_info = MagicMock(return_value=None)
    default_bootstrapper.get_yarn_spark_resource_config = MagicMock(
        return_value=(patched_yarn_config, patched_spark_config))

    default_bootstrapper.set_yarn_spark_resource_config()

    patched_virtual_memory.assert_called_once()
    mocked_virtual_memory_total.assert_called_once()
    patched_cpu_count.assert_called_once()

    default_bootstrapper.load_processing_job_config.assert_called_once()
    default_bootstrapper.load_instance_type_info.assert_called_once()
    default_bootstrapper.get_yarn_spark_resource_config.assert_called_once_with(
        1, 123, 456)
    patched_yarn_config.write_config.assert_called_once()
    patched_spark_config.write_config.assert_called_once()
def test_set_yarn_spark_resource_config(
    patched_yarn_config, patched_spark_config, default_bootstrapper: Bootstrapper
) -> None:
    processing_job_config = {
        "ProcessingResources": {"ClusterConfig": {"InstanceType": "foo.xbar", "InstanceCount": 123}}
    }
    instance_type_info = {"foo.xbar": {"MemoryInfo": {"SizeInMiB": 456}, "VCpuInfo": {"DefaultVCpus": 789}}}
    default_bootstrapper.load_processing_job_config = MagicMock(return_value=processing_job_config)
    default_bootstrapper.load_instance_type_info = MagicMock(return_value=instance_type_info)
    default_bootstrapper.get_yarn_spark_resource_config = MagicMock(
        return_value=(patched_yarn_config, patched_spark_config)
    )

    default_bootstrapper.set_yarn_spark_resource_config()

    default_bootstrapper.load_processing_job_config.assert_called_once()
    default_bootstrapper.load_instance_type_info.assert_called_once()
    default_bootstrapper.get_yarn_spark_resource_config.assert_called_once_with(123, 456, 789)
    patched_yarn_config.write_config.assert_called_once()
    patched_spark_config.write_config.assert_called_once()
def test_get_yarn_spark_resource_config(
        default_bootstrapper: Bootstrapper) -> None:
    # Using a cluster with one single m5.xlarge instance, calculate Yarn and Spark configs, and double check the math
    instance_mem_mb = 16384
    instance_cores = 4
    yarn_config, spark_config = default_bootstrapper.get_yarn_spark_resource_config(
        1, instance_mem_mb, instance_cores)

    exp_yarn_max_mem_mb = 15892  # = int(instance_mem_mb * .97) = int(16384 * .97) = int(15892.48)

    exp_yarn_config_props = {
        "yarn.scheduler.minimum-allocation-mb": "1",
        "yarn.scheduler.maximum-allocation-mb": str(exp_yarn_max_mem_mb),
        "yarn.scheduler.minimum-allocation-vcores": "1",
        "yarn.scheduler.maximum-allocation-vcores": str(instance_cores),
        "yarn.nodemanager.resource.memory-mb": str(exp_yarn_max_mem_mb),
        "yarn.nodemanager.resource.cpu-vcores": str(instance_cores),
    }

    assert yarn_config.Classification == "yarn-site"
    assert yarn_config.Properties == exp_yarn_config_props

    exp_executor_cores = 4  # = instance_cores = 4
    exp_executor_count_total = 1  # = instance_count * executor_count_per_instance = 1 * 1
    exp_default_parallelism = 8  # = instance_count * instance_cores * 2 = 1 * 4 * 2

    exp_driver_mem_mb = 2048  # = 2 * 1024
    exp_driver_mem_ovr_mb = 204  # = int(driver_mem_mb * driver_mem_ovr_pct) = int(2048 * 0.1) = int(204.8)
    # = int((instance_mem_mb - driver_mem_mb - driver_mem_ovr_mb) /
    #       (executor_count_per_instance + executor_count_per_instance * executor_mem_ovr_pct))
    # = int((15892 - 2048 - 204) / (1 + 1 * 0.1))
    # = int(13640 / 1.1)
    exp_executor_mem_mb = 12399
    exp_executor_mem_ovr_mb = 1239  # = int(executor_mem_mb * executor_mem_ovr_pct) = int(12399 * 0.1) = int(1239.9)

    exp_driver_gc_config = (
        "-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 "
        "-XX:+CMSClassUnloadingEnabled")
    exp_driver_java_opts = "-XX:OnOutOfMemoryError='kill -9 %p' " f"{exp_driver_gc_config}"

    # ConcGCThreads = max(int(executor_cores / 4), 1) = max(int(4 / 4), 1) = max(1, 1) = 1
    # ParallelGCThreads = max(int(3 * executor_cores / 4), 1) = max(int(3 * 4 / 4), 1) = max(3, 1) = 3
    exp_executor_gc_config = (
        "-XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70 "
        "-XX:ConcGCThreads=1 "
        "-XX:ParallelGCThreads=3 ")
    exp_executor_java_opts = (
        "-verbose:gc -XX:OnOutOfMemoryError='kill -9 %p' "
        "-XX:+PrintGCDetails -XX:+PrintGCDateStamps "
        f"{exp_executor_gc_config}")

    exp_spark_config_props = {
        "spark.driver.memory": f"{exp_driver_mem_mb}m",
        "spark.driver.memoryOverhead": f"{exp_driver_mem_ovr_mb}m",
        "spark.driver.defaultJavaOptions": f"{exp_driver_java_opts}",
        "spark.executor.memory": f"{exp_executor_mem_mb}m",
        "spark.executor.memoryOverhead": f"{exp_executor_mem_ovr_mb}m",
        "spark.executor.cores": f"{exp_executor_cores}",
        "spark.executor.defaultJavaOptions": f"{exp_executor_java_opts}",
        "spark.executor.instances": f"{exp_executor_count_total}",
        "spark.default.parallelism": f"{exp_default_parallelism}",
    }

    assert spark_config.Classification == "spark-defaults"
    assert spark_config.Properties == exp_spark_config_props

    # Using the same instance type, increase the instance count by 10x
    yarn_config, spark_config = default_bootstrapper.get_yarn_spark_resource_config(
        10, instance_mem_mb, instance_cores)

    # Yarn config should be the same
    assert yarn_config.Properties == exp_yarn_config_props

    # Spark config should be the same with more 10x executors and parallelism
    exp_spark_config_props[
        "spark.executor.instances"] = f"{exp_executor_count_total * 10}"
    exp_spark_config_props[
        "spark.default.parallelism"] = f"{exp_default_parallelism * 10}"
    assert spark_config.Properties == exp_spark_config_props