def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test") cls._spark = SparkSession.builder.config( conf=unit_test_utils.get_default_spark_conf().set( "spark.ext.h2o.cloud.name", cls._cloud_name)).getOrCreate() unit_test_utils.set_up_class(cls) h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2) cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test") cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \ set("spark.ext.h2o.cloud.name", cls._cloud_name) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_cluster_size(1))
def launch(test_env, script_name, param=None): cloud_name = generic_test_utils.unique_cloud_name(script_name) cmd_line = [get_submit_script(test_env.spark_home), "--verbose"] cmd_line.extend(["--master", test_env.spark_master]) if "spark.driver.memory" in test_env.spark_conf: cmd_line.extend([ "--driver-memory", test_env.spark_conf.get("spark.driver.memory") ]) # remove ".py" from cloud name cmd_line.extend( ["--conf", 'spark.ext.h2o.cloud.name=sparkling-water-' + cloud_name]) cmd_line.extend([ "--conf", '"spark.driver.extraJavaOptions=-Dhdp.version=' + test_env.hdp_version + '"' ]) cmd_line.extend([ "--conf", '"spark.yarn.am.extraJavaOptions=-Dhdp.version=' + test_env.hdp_version + '"' ]) cmd_line.extend(["--conf", 'spark.test.home=' + test_env.spark_home]) cmd_line.extend( ["--conf", 'spark.scheduler.minRegisteredResourcesRatio=1']) cmd_line.extend(["--conf", 'spark.ext.h2o.repl.enabled=false' ]) # disable repl in tests cmd_line.extend(["--conf", "spark.ext.h2o.external.start.mode=auto"]) # Need to disable timeline service which requires Jersey libraries v1, but which are not available in Spark2.0 # See: https://www.hackingnote.com/en/spark/trouble-shooting/NoClassDefFoundError-ClientConfig/ cmd_line.extend( ["--conf", 'spark.hadoop.yarn.timeline-service.enabled=false']) cmd_line.extend(["--conf", 'spark.ext.h2o.hadoop.memory=2G']) cmd_line.extend(["--py-files", test_env.sdist]) if generic_test_utils.tests_in_external_mode(): cloud_ip = generic_test_utils.local_ip() test_env.conf("spark.ext.h2o.client.ip", cloud_ip) test_env.conf("spark.ext.h2o.backend.cluster.mode", "external") test_env.conf("spark.ext.h2o.external.cluster.num.h2o.nodes", "1") else: test_env.conf("spark.ext.h2o.backend.cluster.mode", "internal") for k, v in test_env.spark_conf.items(): cmd_line.extend(["--conf", k + '=' + str(v)]) # Add python script cmd_line.append(script_name) if param is not None: cmd_line.append(param) # Launch it via command line return_code = subprocess.call(cmd_line) return return_code
def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test") cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \ set("spark.ext.h2o.cloud.name", cls._cloud_name) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate(cls._spark, H2OConf(cls._spark).set_num_of_external_h2o_nodes(1))
def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_mojo_predictions_test") cls._spark = SparkSession.builder.config(conf = unit_test_utils.get_default_spark_conf()).getOrCreate()