def test_hive_site() -> None:
    configuration = Configuration(
        "hive-site",
        {
            "hive.execution.engine":
            "tez",
            "hive.security.metastore.authorization.manager":
            "org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider",
        },
    )

    serialized_conf = configuration.serialized

    assert (
        serialized_conf ==
        "  <property>\n    <name>hive.execution.engine</name>\n    <value>tez</value>\n  </property>\n  <property>\n    <name>hive.security.metastore.authorization.manager</name>\n    <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>\n  </property>\n"
    )
    def get_regional_configs(self) -> List[Configuration]:
        aws_region = os.getenv("AWS_REGION")
        if aws_region is None:
            logging.warning(
                "Unable to detect AWS region from environment variable AWS_REGION"
            )
            return []
        elif aws_region in ["cn-northwest-1", "cn-north-1"]:
            aws_domain = "amazonaws.com.cn"
            s3_endpoint = f"s3.{aws_region}.{aws_domain}"
        elif aws_region in ["us-gov-west-1", "us-gov-east-1"]:
            aws_domain = "amazonaws.com"
            s3_endpoint = f"s3.{aws_region}.{aws_domain}"
        else:
            # no special regional configs needed
            return []

        return [
            Configuration(Classification="core-site",
                          Properties={"fs.s3a.endpoint": s3_endpoint})
        ]
    def get_yarn_spark_resource_config(
            self, instance_count: int, instance_mem_mb: int,
            instance_cores: int) -> Tuple[Configuration, Configuration]:
        aws_region = os.getenv("AWS_REGION")
        executor_cores = instance_cores
        executor_count_per_instance = int(instance_cores / executor_cores)
        executor_count_total = instance_count * executor_count_per_instance
        default_parallelism = instance_count * instance_cores * 2

        # Let's leave 3% of the instance memory free
        instance_mem_mb = int(instance_mem_mb * 0.97)

        driver_mem_mb = 2 * 1024
        driver_mem_ovr_pct = 0.1
        driver_mem_ovr_mb = int(driver_mem_mb * driver_mem_ovr_pct)
        executor_mem_ovr_pct = 0.1
        executor_mem_mb = int(
            (instance_mem_mb - driver_mem_mb - driver_mem_ovr_mb) /
            (executor_count_per_instance +
             executor_count_per_instance * executor_mem_ovr_pct))
        executor_mem_ovr_mb = int(executor_mem_mb * executor_mem_ovr_pct)

        driver_gc_config = (
            "-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 "
            "-XX:+CMSClassUnloadingEnabled")
        driver_java_opts = f"-XX:OnOutOfMemoryError='kill -9 %p' " f"{driver_gc_config}"

        executor_gc_config = (
            f"-XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70 "
            f"-XX:ConcGCThreads={max(int(executor_cores / 4), 1)} "
            f"-XX:ParallelGCThreads={max(int(3 * executor_cores / 4), 1)} ")
        executor_java_opts = (
            f"-verbose:gc -XX:OnOutOfMemoryError='kill -9 %p' "
            f"-XX:+PrintGCDetails -XX:+PrintGCDateStamps "
            f"{executor_gc_config}")

        yarn_site_config = Configuration(
            "yarn-site",
            {
                "yarn.scheduler.minimum-allocation-mb": "1",
                "yarn.scheduler.maximum-allocation-mb": str(instance_mem_mb),
                "yarn.scheduler.minimum-allocation-vcores": "1",
                "yarn.scheduler.maximum-allocation-vcores":
                str(instance_cores),
                "yarn.nodemanager.resource.memory-mb": str(instance_mem_mb),
                "yarn.nodemanager.resource.cpu-vcores": str(instance_cores),
            },
        )

        spark_defaults_config = Configuration(
            "spark-defaults",
            {
                "spark.driver.memory": f"{driver_mem_mb}m",
                "spark.driver.memoryOverhead": f"{driver_mem_ovr_mb}m",
                "spark.driver.defaultJavaOptions": f"{driver_java_opts}",
                "spark.executor.memory": f"{executor_mem_mb}m",
                "spark.executor.memoryOverhead": f"{executor_mem_ovr_mb}m",
                "spark.executor.cores": f"{executor_cores}",
                "spark.executor.defaultJavaOptions": f"{executor_java_opts}",
                "spark.executor.instances": f"{executor_count_total}",
                "spark.default.parallelism": f"{default_parallelism}",
                "spark.yarn.appMasterEnv.AWS_REGION": f"{aws_region}",
                "spark.executorEnv.AWS_REGION": f"{aws_region}"
            },
        )

        return yarn_site_config, spark_defaults_config
 def _write_conf(conf: Configuration) -> None:
     logging.info("Writing user config to {}".format(conf.path))
     conf_string = conf.write_config()
     logging.info("Configuration at {} is: \n{}".format(
         conf.path, conf_string))