def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test")
     cls._spark = SparkSession.builder.config(
         conf=unit_test_utils.get_default_spark_conf().set(
             "spark.ext.h2o.cloud.name", cls._cloud_name)).getOrCreate()
     unit_test_utils.set_up_class(cls)
     h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2)
     cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
 def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test")
     cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \
         set("spark.ext.h2o.cloud.name", cls._cloud_name)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_cluster_size(1))
Beispiel #3
0
def launch(test_env, script_name, param=None):
    cloud_name = generic_test_utils.unique_cloud_name(script_name)

    cmd_line = [get_submit_script(test_env.spark_home), "--verbose"]
    cmd_line.extend(["--master", test_env.spark_master])
    if "spark.driver.memory" in test_env.spark_conf:
        cmd_line.extend([
            "--driver-memory",
            test_env.spark_conf.get("spark.driver.memory")
        ])
    # remove ".py" from cloud name
    cmd_line.extend(
        ["--conf", 'spark.ext.h2o.cloud.name=sparkling-water-' + cloud_name])
    cmd_line.extend([
        "--conf", '"spark.driver.extraJavaOptions=-Dhdp.version=' +
        test_env.hdp_version + '"'
    ])
    cmd_line.extend([
        "--conf", '"spark.yarn.am.extraJavaOptions=-Dhdp.version=' +
        test_env.hdp_version + '"'
    ])
    cmd_line.extend(["--conf", 'spark.test.home=' + test_env.spark_home])
    cmd_line.extend(
        ["--conf", 'spark.scheduler.minRegisteredResourcesRatio=1'])
    cmd_line.extend(["--conf", 'spark.ext.h2o.repl.enabled=false'
                     ])  # disable repl in tests
    cmd_line.extend(["--conf", "spark.ext.h2o.external.start.mode=auto"])
    # Need to disable timeline service which requires Jersey libraries v1, but which are not available in Spark2.0
    # See: https://www.hackingnote.com/en/spark/trouble-shooting/NoClassDefFoundError-ClientConfig/
    cmd_line.extend(
        ["--conf", 'spark.hadoop.yarn.timeline-service.enabled=false'])
    cmd_line.extend(["--conf", 'spark.ext.h2o.hadoop.memory=2G'])
    cmd_line.extend(["--py-files", test_env.sdist])
    if generic_test_utils.tests_in_external_mode():
        cloud_ip = generic_test_utils.local_ip()
        test_env.conf("spark.ext.h2o.client.ip", cloud_ip)
        test_env.conf("spark.ext.h2o.backend.cluster.mode", "external")
        test_env.conf("spark.ext.h2o.external.cluster.num.h2o.nodes", "1")
    else:
        test_env.conf("spark.ext.h2o.backend.cluster.mode", "internal")

    for k, v in test_env.spark_conf.items():
        cmd_line.extend(["--conf", k + '=' + str(v)])

    # Add python script
    cmd_line.append(script_name)

    if param is not None:
        cmd_line.append(param)

    # Launch it via command line
    return_code = subprocess.call(cmd_line)

    return return_code
 def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test")
     cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \
         set("spark.ext.h2o.cloud.name", cls._cloud_name)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(cls._spark, H2OConf(cls._spark).set_num_of_external_h2o_nodes(1))
 def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_mojo_predictions_test")
     cls._spark = SparkSession.builder.config(conf = unit_test_utils.get_default_spark_conf()).getOrCreate()