Beispiel #1
0
 def setUpClass(cls):
     cls._conf = unit_test_utils.get_default_spark_conf(
         cls._spark_options_from_params)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_cluster_size(1))
 def setUpClass(cls):
     cls._spark = SparkSession.builder.config(
         conf=unit_test_utils.get_default_spark_conf()).getOrCreate()
     unit_test_utils.set_up_class(cls)
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_num_of_external_h2o_nodes(2))
Beispiel #3
0
def createH2OConf():
    conf = H2OConf()
    conf.setClusterSize(1)
    conf.useAutoClusterStart()
    conf.setExternalClusterMode()
    conf.setLogLevel("INFO")
    return conf
 def setUpClass(cls):
     cls._conf = unit_test_utils.get_default_spark_conf(
         cls._spark_options_from_params)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_num_of_external_h2o_nodes(1))
def createH2OConf():
    conf = H2OConf()
    conf.setClusterSize(1)
    conf.set("spark.ext.h2o.rest.api.based.client", "true")
    conf.useAutoClusterStart()
    conf.setExternalClusterMode()
    return conf
Beispiel #6
0
 def setUpClass(cls):
     cls._spark = SparkSession.builder.config(
         conf=test_utils.get_default_spark_conf().set(
             "spark.ext.h2o.cloud.name", "test-cloud")).getOrCreate()
     test_utils.set_up_class(cls)
     h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2)
     cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
Beispiel #7
0
    def getOrCreate(spark_context, conf=None):
        """
         Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then
         configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used.
         If the values are not found the default values are used in most of the cases. The default cluster mode
         is internal, ie. spark.ext.h2o.external.cluster.mode=false

         param - Spark Context
         returns H2O Context
        """
        h2o_context = H2OContext(spark_context)

        jvm = h2o_context._jvm  # JVM
        jsc = h2o_context._jsc  # JavaSparkContext

        if conf is not None:
            selected_conf = conf
        else:
            selected_conf = H2OConf(spark_context)
        # Create H2OContext
        jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate(
            jsc, selected_conf._jconf)
        h2o_context._jhc = jhc
        h2o_context._conf = selected_conf
        h2o_context._client_ip = jhc.h2oLocalClientIp()
        h2o_context._client_port = jhc.h2oLocalClientPort()
        # Create H2O REST API client
        h2o.connect(ip=h2o_context._client_ip, port=h2o_context._client_port)
        h2o_context.is_initialized = True
        return h2o_context
Beispiel #8
0
def createH2OConf(spark):
    conf = H2OConf(spark)
    conf.set_cluster_size(1)
    conf.set("spark.ext.h2o.rest.api.based.client", "true")
    conf.use_auto_cluster_start()
    conf.set_external_cluster_mode()
    conf.set_h2o_node_web_enabled()
    return conf
 def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test")
     cls._spark = SparkSession.builder.config(
         conf=unit_test_utils.get_default_spark_conf().set(
             "spark.ext.h2o.cloud.name", cls._cloud_name)).getOrCreate()
     unit_test_utils.set_up_class(cls)
     h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2)
     cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
 def setUpClass(cls):
     cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test")
     cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \
         set("spark.ext.h2o.cloud.name", cls._cloud_name)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_cluster_size(1))
Beispiel #11
0
 def setUpClass(cls):
     cls._spark = SparkSession.builder.config(
         conf=unit_test_utils.get_default_spark_conf().setMaster(
             "yarn-client")).getOrCreate()
     unit_test_utils.set_up_class(cls)
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_cluster_size(1))
 def setUpClass(cls):
     cls._conf = unit_test_utils.get_default_spark_conf(
         cls._spark_options_from_params)
     cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate()
     cls._hc = H2OContext.getOrCreate(
         cls._spark,
         H2OConf(cls._spark).set_cluster_size(1))
     cls.dataset = cls._spark.read.csv(
         "file://" +
         unit_test_utils.locate("smalldata/iris/iris_wheader.csv"),
         header=True,
         inferSchema=True)
Beispiel #13
0
    def getOrCreate(spark, conf=None, verbose=True, **kwargs):
        """
         Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then
         configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used.
         If the values are not found the default values are used in most of the cases. The default cluster mode
         is internal, ie. spark.ext.h2o.external.cluster.mode=false

         param - Spark Context or Spark Session
         returns H2O Context
        """

        spark_session = spark
        if isinstance(spark, SparkContext):
            warnings.warn(
                "Method H2OContext.getOrCreate with argument of type SparkContext is deprecated and "
                + "parameter of type SparkSession is preferred.")
            spark_session = SparkSession.builder.getOrCreate()

        h2o_context = H2OContext(spark_session)

        jvm = h2o_context._jvm  # JVM
        jspark_session = h2o_context._jspark_session  # Java Spark Session

        if conf is not None:
            selected_conf = conf
        else:
            selected_conf = H2OConf(spark_session)
        # Create backing Java H2OContext
        jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate(
            jspark_session, selected_conf._jconf)
        h2o_context._jhc = jhc
        h2o_context._conf = selected_conf
        h2o_context._client_ip = jhc.h2oLocalClientIp()
        h2o_context._client_port = jhc.h2oLocalClientPort()
        # Create H2O REST API client
        h2o.connect(ip=h2o_context._client_ip,
                    port=h2o_context._client_port,
                    verbose=verbose,
                    **kwargs)
        h2o_context.is_initialized = True

        if verbose:
            print(h2o_context)

        # Stop h2o when running standalone pysparkling scripts, only in client deploy mode
        #, so the user does not need explicitly close h2o.
        # In driver mode the application would call exit which is handled by Spark AM as failure
        deploy_mode = spark_session.sparkContext._conf.get(
            "spark.submit.deployMode")
        if deploy_mode != "cluster":
            atexit.register(lambda: h2o_context.__stop())
        return h2o_context
Beispiel #14
0
    def getOrCreate(spark_context, conf=None):
        """
         Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then
         configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used.
         If the values are not found the default values are used in most of the cases. The default cluster mode
         is internal, ie. spark.ext.h2o.external.cluster.mode=false

         param - Spark Context
         returns H2O Context
        """
        h2o_context = H2OContext(spark_context)

        jvm = h2o_context._jvm  # JVM
        gw = h2o_context._gw  # Py4J Gateway
        jsc = h2o_context._jsc  # JavaSparkContext

        # Imports Sparkling Water into current JVM view
        # We cannot use directly Py4j to import Sparkling Water packages
        #   java_import(sc._jvm, "org.apache.spark.h2o.*")
        # because of https://issues.apache.org/jira/browse/SPARK-5185
        # So lets load class directly via classloader
        # This is finally fixed in Spark 2.0 ( along with other related issues)

        # Call the corresponding getOrCreate method
        jhc_klazz = jvm.java.lang.Thread.currentThread().getContextClassLoader(
        ).loadClass("org.apache.spark.h2o.JavaH2OContext")
        conf_klazz = jvm.java.lang.Thread.currentThread(
        ).getContextClassLoader().loadClass("org.apache.spark.h2o.H2OConf")
        method_def = gw.new_array(jvm.Class, 2)
        method_def[0] = jsc.getClass()
        method_def[1] = conf_klazz
        method = jhc_klazz.getMethod("getOrCreate", method_def)
        method_params = gw.new_array(jvm.Object, 2)
        method_params[0] = jsc
        if conf is not None:
            selected_conf = conf
        else:
            selected_conf = H2OConf(spark_context)
        method_params[1] = selected_conf._jconf
        jhc = method.invoke(None, method_params)
        h2o_context._jhc = jhc
        h2o_context._conf = selected_conf
        h2o_context._client_ip = jhc.h2oLocalClientIp()
        h2o_context._client_port = jhc.h2oLocalClientPort()
        h2o.init(ip=h2o_context._client_ip,
                 port=h2o_context._client_port,
                 start_h2o=False,
                 strict_version_check=False)
        return h2o_context
Beispiel #15
0
    def getOrCreate(spark, conf=None, **kwargs):
        """
         Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then
         configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used.
         If the values are not found the default values are used in most of the cases. The default cluster mode
         is internal, ie. spark.ext.h2o.external.cluster.mode=false

         param - Spark Context or Spark Session
         returns H2O Context
        """

        spark_session = spark
        if isinstance(spark, SparkContext):
            warnings.warn(
                "Method H2OContext.getOrCreate with argument of type SparkContext is deprecated and "
                + "parameter of type SparkSession is preferred.")
            spark_session = SparkSession.builder.getOrCreate()

        h2o_context = H2OContext(spark_session)

        jvm = h2o_context._jvm  # JVM
        jsc = h2o_context._jsc  # JavaSparkContext

        if conf is not None:
            selected_conf = conf
        else:
            selected_conf = H2OConf(spark_session)
        # Create backing Java H2OContext
        jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate(
            jsc, selected_conf._jconf)
        h2o_context._jhc = jhc
        h2o_context._conf = selected_conf
        h2o_context._client_ip = jhc.h2oLocalClientIp()
        h2o_context._client_port = jhc.h2oLocalClientPort()
        # Create H2O REST API client
        h2o.connect(ip=h2o_context._client_ip,
                    port=h2o_context._client_port,
                    **kwargs)
        h2o_context.is_initialized = True
        # Stop h2o when running standalone pysparkling scripts and the user does not explicitly close h2o
        atexit.register(lambda: h2o_context.stop_with_jvm())
        return h2o_context
 def setUpClass(cls):
     cls._sc = SparkContext(conf=test_utils.get_default_spark_conf())
     test_utils.set_up_class(cls)
     cls._hc = H2OContext.getOrCreate(
         cls._sc,
         H2OConf(cls._sc).set_num_of_external_h2o_nodes(2))
 def setUpClass(cls):
     cls._sc = SparkContext(conf=test_utils.get_default_spark_conf().set(
         "spark.ext.h2o.cloud.name", "test-cloud"))
     test_utils.set_up_class(cls)
     h2o_conf = H2OConf(cls._sc).set_num_of_external_h2o_nodes(2)
     cls._hc = H2OContext.getOrCreate(cls._sc, h2o_conf)
Beispiel #18
0
def hc(spark):
    return H2OContext.getOrCreate(H2OConf().setClusterSize(1))
Beispiel #19
0
def hc(spark):
    return H2OContext.getOrCreate(spark, H2OConf(spark).set_cluster_size(1))
Beispiel #20
0
 def setUpClass(cls):
     cls._sc = SparkContext(conf = test_utils.get_default_spark_conf())
     test_utils.set_up_class(cls)
     cls._hc = H2OContext.getOrCreate(cls._sc, H2OConf(cls._sc))