Exemple #1
0
 def _arg_to_expr(arg):
     if arg is not None and isinstance(arg, range): arg = list(arg)
     if arg is None:
         return "[]"  # empty list
     elif isinstance(arg, ExprNode):
         return arg._do_it(False)
     elif isinstance(arg, ASTId):
         return str(arg)
     elif isinstance(arg, bool):
         return "{}".format("TRUE" if arg else "FALSE")
     elif is_numeric(arg):
         return "{}".format("NaN" if math.isnan(arg) else arg)
     elif is_str(arg):
         return '"' + arg + '"'
     elif isinstance(arg, slice):
         return "[{}:{}]".format(
             0 if arg.start is None else arg.start, "NaN" if
             (arg.stop is None or math.isnan(arg.stop)) else
             (arg.stop) if arg.start is None else (arg.stop - arg.start))
     elif isinstance(arg, list):
         allstrs = all(is_str(elem) for elem in arg)
         if allstrs:
             return "[%s]" % " ".join('"%s"' % elem for elem in arg)
         else:
             return "[%s]" % " ".join(
                 "NaN" if i == 'NaN' or math.isnan(i) else str(i)
                 for i in arg)
     raise ValueError("Unexpected arg type: " + str(type(arg)) + " " +
                      str(arg.__class__) + " " + arg.__repr__())
Exemple #2
0
 def _arg_to_expr(arg):
     if arg is not None and isinstance(arg, range): arg = list(arg)
     if arg is None:
         return "[]"  # empty list
     elif isinstance(arg, ExprNode):
         return arg._do_it(False)
     elif isinstance(arg, ASTId):
         return str(arg)
     elif isinstance(arg, bool):
         return "{}".format("TRUE" if arg else "FALSE")
     elif is_numeric(arg):
         return "{}".format("NaN" if math.isnan(arg) else arg)
     elif is_str(arg):
         return '"' + arg + '"'
     elif isinstance(arg, slice):
         return "[{}:{}]".format(0 if arg.start is None else arg.start,
                                 "NaN" if (arg.stop is None or math.isnan(arg.stop)) else (
                                 arg.stop) if arg.start is None else (arg.stop - arg.start))
     elif isinstance(arg, list):
         allstrs = all(is_str(elem) for elem in arg)
         if allstrs:
             return "[%s]" % " ".join('"%s"' % elem for elem in arg)
         else:
             return "[%s]" % " ".join("NaN" if i == 'NaN' or math.isnan(i) else str(i) for i in arg)
     raise ValueError("Unexpected arg type: " + str(type(arg)) + " " + str(arg.__class__) + " " + arg.__repr__())
Exemple #3
0
    def __init__(self, fr, by):
        self._fr = fr  # IN
        self._by = by  # IN
        self._aggs = {}  # IN
        self._res = None  # OUT

        if is_str(by):
            self._by = [self._fr.names.index(by)]
        elif is_listlike(by):
            self._by = [self._fr.names.index(b) if is_str(b) else b for b in by]
        else:
            self._by = [self._by]
Exemple #4
0
def remove(x):
    """Remove object(s) from H2O.

    Parameters
    ----------
      x : H2OFrame, H2OEstimator, or string, or a list/tuple of those things.
        The object(s) or unique id(s) pointing to the object(s) to be removed.
    """
    if not isinstance(x, (list, tuple)): x = (x,)
    for xi in x:
        if xi is None:
            raise ValueError("h2o.remove with no object is not supported, for your protection")
        if isinstance(xi, H2OFrame):
            xi_id = xi._ex._cache._id  # String or None
            if xi_id is None: return  # Lazy frame, never evaluated, nothing in cluster
            rapids("(rm {})".format(xi_id))
            xi._ex = None
        elif isinstance(xi, H2OEstimator):
            api("DELETE /3/DKV/%s" % xi.model_id)
            xi._id = None
        elif is_str(xi):
            # string may be a Frame key name part of a rapids session... need to call rm thru rapids here
            try:
                rapids("(rm {})".format(xi))
            except:
                api("DELETE /3/DKV/%s" % xi)
        else:
            raise ValueError('input to h2o.remove must one of: H2OFrame, H2OEstimator, or string')
def _handle_python_dicts(python_obj):
    header = list(python_obj.keys())
    is_valid = all([
        re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header
    ])  # is this a valid header?
    if not is_valid:
        raise ValueError(
            "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ "
        )
    for k in python_obj:  # check that each value entry is a flat list/tuple or single int, float, or string
        v = python_obj[k]
        if isinstance(
                v,
            (tuple, list)):  # if value is a tuple/list, then it must be flat
            if _is_list_of_lists(v):
                raise ValueError("Values in the dictionary must be flattened!")
        elif is_numeric(v) or is_str(v):
            python_obj[k] = [v]
        else:
            raise ValueError(
                "Encountered invalid dictionary value when constructing H2OFrame. Got: {0}"
                .format(v))

    rows = list(map(list, itertools.zip_longest(*list(python_obj.values()))))
    data_to_write = [dict(list(zip(header, row))) for row in rows]
    return header, data_to_write
Exemple #6
0
 def __new__(cls, keyvals):
     # This method is called by the simplejson.json(object_pairs_hook=<this>)
     # `keyvals` is a list of (key,value) tuples. For example:
     #    [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")]
     schema = None
     for k, v in keyvals:
         if k == "__meta" and isinstance(v, dict):
             schema = v["schema_name"]
             break
         if k == "__schema" and is_str(v):
             schema = v
             break
     if schema == "CloudV3": return H2OCluster(keyvals)
     if schema == "H2OErrorV3": return H2OErrorV3(keyvals)
     if schema == "H2OModelBuilderErrorV3":
         return H2OModelBuilderErrorV3(keyvals)
     if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals)
     if schema == "ModelMetricsRegressionV3":
         return H2ORegressionModelMetrics.make(keyvals)
     if schema == "ModelMetricsClusteringV3":
         return H2OClusteringModelMetrics.make(keyvals)
     if schema == "ModelMetricsBinomialV3":
         return H2OBinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsMultinomialV3":
         return H2OMultinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsAutoEncoderV3":
         return H2OAutoEncoderModelMetrics.make(keyvals)
     return super(H2OResponse, cls).__new__(cls, keyvals)
Exemple #7
0
def remove(x):
    """Remove object(s) from H2O.

    Parameters
    ----------
      x : H2OFrame, H2OEstimator, or string, or a list/tuple of those things.
        The object(s) or unique id(s) pointing to the object(s) to be removed.
    """
    if not isinstance(x, (list, tuple)): x = (x, )
    for xi in x:
        if xi is None:
            raise ValueError(
                "h2o.remove with no object is not supported, for your protection"
            )
        if isinstance(xi, H2OFrame):
            xi_id = xi._ex._cache._id  # String or None
            if xi_id is None:
                return  # Lazy frame, never evaluated, nothing in cluster
            rapids("(rm {})".format(xi_id))
            xi._ex = None
        elif isinstance(xi, H2OEstimator):
            api("DELETE /3/DKV/%s" % xi.model_id)
            xi._id = None
        elif is_str(xi):
            # string may be a Frame key name part of a rapids session... need to call rm thru rapids here
            try:
                rapids("(rm {})".format(xi))
            except:
                api("DELETE /3/DKV/%s" % xi)
        else:
            raise ValueError(
                'input to h2o.remove must one of: H2OFrame, H2OEstimator, or string'
            )
Exemple #8
0
    def _get_type_name(types):
        """
        Return the name of the provided type.

            >>> _get_type_name([int]) == "integer"
            >>> _get_type_name([str]) == "string"
            >>> _get_type_name([tuple]) == "tuple"
            >>> _get_type_name([Exception]) == "Exception"
            >>> _get_type_name((int, float, bool)) == "integer|float|bool"
            >>> _get_type_name((H2OFrame, None)) == "?H2OFrame"
        """
        from h2o.utils.typechecks import is_str, is_int, U, I, numeric
        maybe_type = False
        res = []
        for tt in types:
            if tt is None:
                maybe_type = True
            elif tt is str:
                res.append("string")
            elif tt is int:
                res.append("integer")
            elif tt is numeric:
                res.append("numeric")
            elif is_str(tt):
                res.append('"%s"' % repr(tt)[1:-1])
            elif is_int(tt):
                res.append(str(tt))
            elif isinstance(tt, U):
                res.append(H2OTypeError._get_type_name(tt))
            elif isinstance(tt, I):
                res.append("&".join(
                    H2OTypeError._get_type_name([tttt]) for tttt in tt))
            elif isinstance(tt, type):
                res.append(tt.__name__)
            elif isinstance(tt, list):
                res.append("list(%s)" % H2OTypeError._get_type_name(tt))
            elif isinstance(tt, set):
                res.append("set(%s)" % H2OTypeError._get_type_name(tt))
            elif isinstance(tt, tuple):
                res.append("(%s)" % ", ".join(
                    H2OTypeError._get_type_name([item]) for item in tt))
            elif isinstance(tt, dict):
                res.append("dict(%s)" %
                           ", ".join("%s: %s" %
                                     (H2OTypeError._get_type_name([tk]),
                                      H2OTypeError._get_type_name([tv]))
                                     for tk, tv in tt.items()))
            else:
                raise RuntimeError("Unexpected `tt`: %r" % tt)
        if maybe_type:
            if not res: return "None"
            res[0] = "?" + res[0]
        return "|".join(res)
Exemple #9
0
    def _log_message(self, msg):
        """
        Log the message `msg` to the destination `self._logging_dest`.

        If this destination is a file name, then we append the message to the file and then close the file
        immediately. If the destination is an open file handle, then we simply write the message there and do not
        attempt to close it.
        """
        if is_str(self._logging_dest):
            with open(self._logging_dest, "at", encoding="utf-8") as f:
                f.write(msg)
        else:
            self._logging_dest.write(msg)
Exemple #10
0
    def _log_message(self, msg):
        """
        Log the message `msg` to the destination `self._logging_dest`.

        If this destination is a file name, then we append the message to the file and then close the file
        immediately. If the destination is an open file handle, then we simply write the message there and do not
        attempt to close it.
        """
        if is_str(self._logging_dest):
            with open(self._logging_dest, "at", encoding="utf-8") as f:
                f.write(msg)
        else:
            self._logging_dest.write(msg)
Exemple #11
0
    def _get_type_name(types):
        """
        Return the name of the provided type.

            >>> _get_type_name([int]) == "integer"
            >>> _get_type_name([str]) == "string"
            >>> _get_type_name([tuple]) == "tuple"
            >>> _get_type_name([Exception]) == "Exception"
            >>> _get_type_name((int, float, bool)) == "integer|float|bool"
            >>> _get_type_name((H2OFrame, None)) == "?H2OFrame"
        """
        from h2o.utils.typechecks import is_str, is_int, U, I, numeric
        maybe_type = False
        res = []
        for tt in types:
            if tt is None:
                maybe_type = True
            elif tt is str:
                res.append("string")
            elif tt is int:
                res.append("integer")
            elif tt is numeric:
                res.append("numeric")
            elif is_str(tt):
                res.append('"%s"' % repr(tt)[1:-1])
            elif is_int(tt):
                res.append(str(tt))
            elif isinstance(tt, U):
                res.append(H2OTypeError._get_type_name(tt))
            elif isinstance(tt, I):
                res.append("&".join(H2OTypeError._get_type_name([tttt]) for tttt in tt))
            elif isinstance(tt, type):
                res.append(tt.__name__)
            elif isinstance(tt, list):
                res.append("list(%s)" % H2OTypeError._get_type_name(tt))
            elif isinstance(tt, set):
                res.append("set(%s)" % H2OTypeError._get_type_name(tt))
            elif isinstance(tt, tuple):
                res.append("(%s)" % ", ".join(H2OTypeError._get_type_name([item]) for item in tt))
            elif isinstance(tt, dict):
                res.append("dict(%s)" % ", ".join(
                    "%s: %s" % (H2OTypeError._get_type_name([tk]), H2OTypeError._get_type_name([tv]))
                    for tk, tv in tt.items()
                ))
            else:
                raise RuntimeError("Unexpected `tt`: %r" % tt)
        if maybe_type:
            if not res: return "None"
            res[0] = "?" + res[0]
        return "|".join(res)
Exemple #12
0
    def start_logging(self, dest=None):
        """
        Start logging all API requests to the provided destination.

        :param dest: Where to write the log: either a filename (str), or an open file handle (file). If not given,
            then a new temporary file will be created.
        """
        if dest is None:
            dest = os.path.join(tempfile.mkdtemp(), "h2o-connection.log")
        if not (isinstance(dest, type(sys.stdout)) or is_str(dest)):
            raise ValueError("Logging destination should be either a string (filename), or an open file handle")
        self._print("Now logging all API requests to file %r" % dest)
        self._is_logging = True
        self._logging_dest = dest
 def __init__(self, widgets, title, file_mode):
     super(ProgressBarWidget, self).__init__()
     self._file_mode = file_mode
     self._width = min(self._get_terminal_size(), 100)
     self._encoding = (sys.stdout.encoding or "").lower()
     wlist = []
     for widget in (widgets or [title + ":", PBWBar(), PBWPercentage()]):
         if is_str(widget):
             widget = PBWString(widget)
         widget.set_mode("file" if file_mode else "tty")
         widget.set_encoding(self._encoding)
         wlist.append(widget)
     self._widgets = tuple(wlist)
     self._widget_lengths = self._compute_widget_sizes()
     self._rendered = ""
Exemple #14
0
 def __init__(self, widgets, title, file_mode):
     super(ProgressBarWidget, self).__init__()
     self._file_mode = file_mode
     self._width = min(self._get_terminal_size(), 100)
     self._encoding = (sys.stdout.encoding or "").lower()
     wlist = []
     for widget in (widgets or [title + ":", PBWBar(), PBWPercentage()]):
         if is_str(widget):
             widget = PBWString(widget)
         widget.set_mode("file" if file_mode else "tty")
         widget.set_encoding(self._encoding)
         wlist.append(widget)
     self._widgets = tuple(wlist)
     self._widget_lengths = self._compute_widget_sizes()
     self._rendered = ""
Exemple #15
0
 def _add_agg(self, op, col, na):
     if op == "nrow": col = 0
     if col is None:
         for i in range(self._fr.ncol):
             if i not in self._by: self._add_agg(op, i, na)
         return self
     elif is_str(col):
         cidx = self._fr.names.index(col)
     elif is_int(col):
         cidx = col
     elif is_listlike(col):
         for i in col:
             self._add_agg(op, i, na)
         return self
     else:
         raise ValueError("col must be a column name or index.")
     name = "{}_{}".format(op, self._fr.names[cidx])
     self._aggs[name] = [op, cidx, na]
     return self
Exemple #16
0
def _handle_python_dicts(python_obj):
    header = list(python_obj.keys())
    is_valid = all([re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header])  # is this a valid header?
    if not is_valid:
        raise ValueError(
            "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ ")
    for k in python_obj:  # check that each value entry is a flat list/tuple or single int, float, or string
        v = python_obj[k]
        if isinstance(v, (tuple, list)):  # if value is a tuple/list, then it must be flat
            if _is_list_of_lists(v):
                raise ValueError("Values in the dictionary must be flattened!")
        elif is_numeric(v) or is_str(v):
            python_obj[k] = [v]
        else:
            raise ValueError("Encountered invalid dictionary value when constructing H2OFrame. Got: {0}".format(v))

    rows = list(map(list, itertools.zip_longest(*list(python_obj.values()))))
    data_to_write = [dict(list(zip(header, row))) for row in rows]
    return header, data_to_write
Exemple #17
0
 def __new__(cls, keyvals):
     # This method is called by the simplejson.json(object_pairs_hook=<this>)
     # `keyvals` is a list of (key,value) tuples. For example:
     #    [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")]
     schema = None
     for k, v in keyvals:
         if k == "__meta" and isinstance(v, dict):
             schema = v["schema_name"]
             break
         if k == "__schema" and is_str(v):
             schema = v
             break
     if schema == "CloudV3": return H2OCluster(keyvals)
     if schema == "H2OErrorV3": return H2OErrorV3(keyvals)
     if schema == "H2OModelBuilderErrorV3": return H2OModelBuilderErrorV3(keyvals)
     if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals)
     if schema == "ModelMetricsRegressionV3": return H2ORegressionModelMetrics.make(keyvals)
     if schema == "ModelMetricsClusteringV3": return H2OClusteringModelMetrics.make(keyvals)
     if schema == "ModelMetricsBinomialV3": return H2OBinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsMultinomialV3": return H2OMultinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsAutoEncoderV3": return H2OAutoEncoderModelMetrics.make(keyvals)
     return super(H2OResponse, cls).__new__(cls, keyvals)
Exemple #18
0
    def open(server=None, url=None, ip=None, port=None, https=None, auth=None, verify_ssl_certificates=True,
             proxy=None, cluster_name=None, verbose=True, _msgs=None):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is it attempts to connect to the
        specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only.
        :param verbose: if True, then connection progress info will be printed to the stdout.
        :param _msgs: custom messages to display during connection. This is a tuple (initial message, success message,
            failure message).

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_type(ip, None, "`ip` should be None when `server` parameter is supplied")
            assert_is_type(url, None, "`ip` should be None when `server` parameter is supplied")
            if not server.is_running():
                raise H2OConnectionError("Unable to connect to server because it is not running")
            ip = server.ip
            port = server.port
            scheme = server.scheme
        elif url is not None:
            assert_is_type(url, str)
            assert_is_type(ip, None, "`ip` should be None when `url` parameter is supplied")
            # We don't allow any Unicode characters in the URL. Maybe some day we will...
            match = assert_matches(url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$")
            scheme = match.group(1)
            ip = match.group(2)
            port = int(match.group(3))
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_str(port) and port.isdigit(): port = int(port)
            assert_is_type(ip, str)
            assert_is_type(port, int)
            assert_is_type(https, bool)
            assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+")
            assert_satisfies(port, 1 <= port <= 65535)
            scheme = "https" if https else "http"

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_type(verify_ssl_certificates, bool)
        assert_is_type(proxy, str, None)
        assert_is_type(auth, AuthBase, (str, str), None)
        assert_is_type(cluster_name, str, None)
        assert_is_type(_msgs, None, (str, str, str))

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d" % (scheme, ip, port)
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._auth = auth
        conn._cluster_name = cluster_name
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." % os.environ[name])

        try:
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster_info = conn._test_connection(retries, messages=_msgs)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None
            # This is a good one! On the surface it registers a callback to be invoked when the script is about
            # to finish, but it also has a side effect in that the reference to current connection will be held
            # by the ``atexit`` service till the end -- which means it will never be garbage-collected.
            atexit.register(lambda: conn.close())
        except Exception:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise
        return conn
Exemple #19
0
def _is_str_list(l):
    return isinstance(l, (tuple, list)) and all(is_str(i) for i in l)
Exemple #20
0
def parse_setup(raw_frames,
                destination_frame="",
                header=(-1, 0, 1),
                separator="",
                column_names=None,
                column_types=None,
                na_strings=None):
    """During parse setup, the H2O cluster will make several guesses about the attributes of
    the data. This method allows a user to perform corrective measures by updating the
    returning dictionary from this method. This dictionary is then fed into `parse_raw` to
    produce the H2OFrame instance.

    Parameters
    ----------
      raw_frames : H2OFrame
        A collection of imported file frames

      destination_frame : str, optional
        The unique hex key assigned to the imported file. If none is given, a key will
        automatically be generated.

      parse : bool, optional
        A logical value indicating whether the file should be parsed after import.

      header : int, optional
        -1 means the first line is data, 0 means guess, 1 means first line is header.

      sep : str, optional
        The field separator character. Values on each line of the file are separated by this
         character. If sep = "", the parser will automatically detect the separator.

      col_names : list, optional
        A list of column names for the file.

      col_types : list or dict, optional
          A list of types or a dictionary of column names to types to specify whether columns
          should be forced to a certain type upon import parsing. If a list, the types for
          elements that are None will be guessed. The possible types a column may have are:
          "unknown" - this will force the column to be parsed as all NA
          "uuid"    - the values in the column must be true UUID or will be parsed as NA
          "string"  - force the column to be parsed as a string
          "numeric" - force the column to be parsed as numeric. H2O will handle the
          compression of the numeric data in the optimal manner.
          "enum"    - force the column to be parsed as a categorical column.
          "time"    - force the column to be parsed as a time column. H2O will attempt to
          parse the following list of date time formats
          date - "yyyy-MM-dd", "yyyy MM dd", "dd-MMM-yy", "dd MMM yy"
          time - "HH:mm:ss", "HH:mm:ss:SSS", "HH:mm:ss:SSSnnnnnn", "HH.mm.ss" "HH.mm.ss.SSS",
          "HH.mm.ss.SSSnnnnnn"
          Times can also contain "AM" or "PM".

      na_strings : list or dict, optional
        A list of strings, or a list of lists of strings (one list per column), or a
        dictionary of column names to strings which are to be interpreted as missing values.

    Returns
    -------
      A dictionary is returned containing all of the guesses made by the H2O back end.
    """

    # The H2O backend only accepts things that are quoted
    if is_str(raw_frames): raw_frames = [raw_frames]

    # temporary dictionary just to pass the following information to the parser: header, separator
    kwargs = {}
    # set header
    if header != (-1, 0, 1):
        if header not in (-1, 0, 1):
            raise ValueError("header should be -1, 0, or 1")
        kwargs["check_header"] = header

    # set separator
    if separator:
        if not is_str(separator) or len(separator) != 1:
            raise ValueError(
                "separator should be a single character string; got %r" %
                separator)
        kwargs["separator"] = ord(separator)

    kwargs["source_frames"] = [quoted(id) for id in raw_frames]
    j = api("POST /3/ParseSetup", data=kwargs)
    if "warnings" in j and j["warnings"]:
        for w in j['warnings']:
            warnings.warn(w)
    # TODO: really should be url encoding...
    if destination_frame:
        j["destination_frame"] = destination_frame.replace("%", ".").replace(
            "&", ".")
    if column_names is not None:
        if not isinstance(column_names, list):
            raise ValueError("col_names should be a list")
        if len(column_names) != len(j["column_types"]):
            raise ValueError(
                "length of col_names should be equal to the number of columns")
        j["column_names"] = column_names
    if column_types is not None:
        if isinstance(column_types, dict):
            # overwrite dictionary to ordered list of column types. if user didn't specify column type for all names,
            # use type provided by backend
            if j["column_names"] is None:  # no colnames discovered! (C1, C2, ...)
                j["column_names"] = gen_header(j["number_columns"])
            if not set(column_types.keys()).issubset(set(j["column_names"])):
                raise ValueError(
                    "names specified in col_types is not a subset of the column names"
                )
            idx = 0
            column_types_list = []
            for name in j["column_names"]:
                if name in column_types:
                    column_types_list.append(column_types[name])
                else:
                    column_types_list.append(j["column_types"][idx])
                idx += 1
            column_types = column_types_list
        elif isinstance(column_types, list):
            if len(column_types) != len(j["column_types"]):
                raise ValueError(
                    "length of col_types should be equal to the number of columns"
                )
            column_types = [
                column_types[i] if column_types[i] else j["column_types"][i]
                for i in range(len(column_types))
            ]
        else:  # not dictionary or list
            raise ValueError(
                "col_types should be a list of types or a dictionary of column names to types"
            )
        j["column_types"] = column_types
    if na_strings is not None:
        if isinstance(na_strings, dict):
            # overwrite dictionary to ordered list of lists of na_strings
            if not j["column_names"]:
                raise ValueError("column names should be specified")
            if not set(na_strings.keys()).issubset(set(j["column_names"])):
                raise ValueError(
                    "names specified in na_strings is not a subset of the column names"
                )
            j["na_strings"] = [[] for _ in range(len(j["column_names"]))]
            for name, na in na_strings.items():
                idx = j["column_names"].index(name)
                if is_str(na): na = [na]
                for n in na:
                    j["na_strings"][idx].append(quoted(n))
        elif is_list_of_lists(na_strings):
            if len(na_strings) != len(j["column_types"]):
                raise ValueError(
                    "length of na_strings should be equal to the number of columns"
                )
            j["na_strings"] = [[quoted(na)
                                for na in col] if col is not None else []
                               for col in na_strings]
        elif isinstance(na_strings, list):
            j["na_strings"] = [[quoted(na)
                                for na in na_strings]] * len(j["column_types"])
        else:  # not a dictionary or list
            raise ValueError(
                "na_strings should be a list, a list of lists (one list per column), or a dictionary of column "
                "names to strings which are to be interpreted as missing values"
            )

    # quote column names and column types also when not specified by user
    if j["column_names"]:
        j["column_names"] = list(map(quoted, j["column_names"]))
    j["column_types"] = list(map(quoted, j["column_types"]))
    return j
Exemple #21
0
    def start(jar_path=None,
              nthreads=-1,
              enable_assertions=True,
              max_mem_size=None,
              min_mem_size=None,
              ice_root=None,
              port="54321+",
              verbose=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param verbose: If True, then connection info will be printed to the stdout.

        :returns: a new H2OLocalServer instance
        """
        assert jar_path is None or is_str(
            jar_path), "`jar_path` should be string, got %s" % type(jar_path)
        assert jar_path is None or jar_path.endswith("h2o.jar"), \
            "`jar_path` should be a path to an h2o.jar executable, got %s" % jar_path
        assert is_int(
            nthreads), "`nthreads` should be integer, got %s" % type(nthreads)
        assert nthreads == -1 or 1 <= nthreads <= 4096, "`nthreads` is out of bounds: %d" % nthreads
        assert isinstance(enable_assertions, bool), \
            "`enable_assertions` should be bool, got %s" % type(enable_assertions)
        assert max_mem_size is None or is_int(max_mem_size), \
            "`max_mem_size` should be integer, got %s" % type(max_mem_size)
        assert max_mem_size is None or max_mem_size >= 1 << 25, "`max_mem_size` too small: %d" % max_mem_size
        assert min_mem_size is None or is_int(min_mem_size), \
            "`min_mem_size` should be integer, got %s" % type(min_mem_size)
        assert min_mem_size is None or max_mem_size is None or min_mem_size <= max_mem_size, \
            "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)
        if ice_root:
            assert is_str(
                ice_root
            ), "`ice_root` should be string, got %r" % type(ice_root)
            assert os.path.isdir(
                ice_root), "`ice_root` is not a valid directory: %s" % ice_root
        if port is None: port = "54321+"
        baseport = None
        if is_str(port):
            if port.isdigit():
                port = int(port)
            else:
                assert port[-1] == "+" and port[:-1].isdigit(), \
                    "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port
                baseport = int(port[:-1])
                port = 0
        assert is_int(
            port), "`port` should be integer (or string). Got: %s" % type(port)

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._ice_root = ice_root
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port,
                          baseport=baseport,
                          nthreads=int(nthreads),
                          ea=enable_assertions,
                          mmax=max_mem_size,
                          mmin=min_mem_size)
        if verbose:
            print("  Server is running at %s://%s:%d" %
                  (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Exemple #22
0
    def open(server=None,
             url=None,
             ip=None,
             port=None,
             https=None,
             auth=None,
             verify_ssl_certificates=True,
             proxy=None,
             cluster_name=None,
             verbose=True):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is it attempts to connect to the
        specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only.
        :param verbose: if True, then connection progress info will be printed to the stdout.

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_type(
                ip, None,
                "`ip` should be None when `server` parameter is supplied")
            assert_is_type(
                url, None,
                "`ip` should be None when `server` parameter is supplied")
            if not server.is_running():
                raise H2OConnectionError(
                    "Unable to connect to server because it is not running")
            ip = server.ip
            port = server.port
            scheme = server.scheme
        elif url is not None:
            assert_is_type(url, str)
            assert_is_type(
                ip, None,
                "`ip` should be None when `url` parameter is supplied")
            # We don't allow any Unicode characters in the URL. Maybe some day we will...
            match = assert_matches(
                url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$")
            scheme = match.group(1)
            ip = match.group(2)
            port = int(match.group(3))
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_str(port) and port.isdigit(): port = int(port)
            assert_is_type(ip, str)
            assert_is_type(port, int)
            assert_is_type(https, bool)
            assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+")
            assert_satisfies(port, 1 <= port <= 65535)
            scheme = "https" if https else "http"

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_type(verify_ssl_certificates, bool)
        assert_is_type(proxy, str, None)
        assert_is_type(auth, AuthBase, (str, str), None)
        assert_is_type(cluster_name, str, None)

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d" % (scheme, ip, port)
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._auth = auth
        conn._cluster_name = cluster_name
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." %
                         os.environ[name])

        try:
            # Make a fake _session_id, otherwise .request() will complain that the connection is not initialized
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster_info = conn._test_connection(retries)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None
            atexit.register(lambda: conn.close())
        except:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise
        return conn
def _is_str_list(l):
    return isinstance(l, (tuple, list)) and all(is_str(i) for i in l)
Exemple #24
0
    def open(server=None, url=None, ip=None, port=None, https=None, verify_ssl_certificates=True, auth=None,
             proxy=None, cluster_name=None, verbose=True):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is it attempts to connect to the
        specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only.
        :param verbose: if True, then connection progress info will be printed to the stdout.

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_none(ip, "when `server` parameter is given")
            assert_is_none(url, "when `server` parameter is given")
            ip = server.ip
            port = server.port
            scheme = server.scheme
        elif url is not None:
            assert_is_str(url)
            assert_is_none(ip, "when `url` parameter is given")
            parts = url.rstrip("/").split(":")
            assert len(parts) == 3 and (parts[0] in {"http", "https"}) and parts[2].isdigit(), \
                "Invalid URL parameter '%s'" % url
            scheme = parts[0]
            ip = parts[1][2:]
            port = int(parts[2])
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_str(port) and port.isdigit(): port = int(port)
            assert_is_str(ip)
            assert_is_int(port)
            assert_is_bool(https)
            assert 1 <= port <= 65535, "Invalid `port` number: %d" % port
            scheme = "https" if https else "http"

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_bool(verify_ssl_certificates)
        assert_maybe_str(proxy)
        assert auth is None or isinstance(auth, tuple) and len(auth) == 2 or isinstance(auth, AuthBase), \
            "Invalid authentication token of type %s" % type(auth)
        assert_maybe_str(cluster_name)

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d" % (scheme, ip, port)
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._auth = auth
        conn._cluster_name = cluster_name
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." % os.environ[name])

        try:
            # Make a fake _session_id, otherwise .request() will complain that the connection is not initialized
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster_info = conn._test_connection(retries)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None
            atexit.register(lambda: conn.close())
        except:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise
        return conn
Exemple #25
0
def parse_setup(raw_frames, destination_frame="", header=(-1, 0, 1), separator="", column_names=None,
                column_types=None, na_strings=None):
    """During parse setup, the H2O cluster will make several guesses about the attributes of
    the data. This method allows a user to perform corrective measures by updating the
    returning dictionary from this method. This dictionary is then fed into `parse_raw` to
    produce the H2OFrame instance.

    Parameters
    ----------
      raw_frames : H2OFrame
        A collection of imported file frames

      destination_frame : str, optional
        The unique hex key assigned to the imported file. If none is given, a key will
        automatically be generated.

      parse : bool, optional
        A logical value indicating whether the file should be parsed after import.

      header : int, optional
        -1 means the first line is data, 0 means guess, 1 means first line is header.

      sep : str, optional
        The field separator character. Values on each line of the file are separated by this
         character. If sep = "", the parser will automatically detect the separator.

      col_names : list, optional
        A list of column names for the file.

      col_types : list or dict, optional
          A list of types or a dictionary of column names to types to specify whether columns
          should be forced to a certain type upon import parsing. If a list, the types for
          elements that are None will be guessed. The possible types a column may have are:
          "unknown" - this will force the column to be parsed as all NA
          "uuid"    - the values in the column must be true UUID or will be parsed as NA
          "string"  - force the column to be parsed as a string
          "numeric" - force the column to be parsed as numeric. H2O will handle the
          compression of the numeric data in the optimal manner.
          "enum"    - force the column to be parsed as a categorical column.
          "time"    - force the column to be parsed as a time column. H2O will attempt to
          parse the following list of date time formats
          date - "yyyy-MM-dd", "yyyy MM dd", "dd-MMM-yy", "dd MMM yy"
          time - "HH:mm:ss", "HH:mm:ss:SSS", "HH:mm:ss:SSSnnnnnn", "HH.mm.ss" "HH.mm.ss.SSS",
          "HH.mm.ss.SSSnnnnnn"
          Times can also contain "AM" or "PM".

      na_strings : list or dict, optional
        A list of strings, or a list of lists of strings (one list per column), or a
        dictionary of column names to strings which are to be interpreted as missing values.

    Returns
    -------
      A dictionary is returned containing all of the guesses made by the H2O back end.
    """

    # The H2O backend only accepts things that are quoted
    if is_str(raw_frames): raw_frames = [raw_frames]

    # temporary dictionary just to pass the following information to the parser: header, separator
    kwargs = {}
    # set header
    if header != (-1, 0, 1):
        if header not in (-1, 0, 1): raise ValueError("header should be -1, 0, or 1")
        kwargs["check_header"] = header

    # set separator
    if separator:
        if not is_str(separator) or len(separator) != 1:
            raise ValueError("separator should be a single character string; got %r" % separator)
        kwargs["separator"] = ord(separator)

    kwargs["source_frames"] = [quoted(id) for id in raw_frames]
    j = api("POST /3/ParseSetup", data=kwargs)
    if "warnings" in j and j["warnings"]:
        for w in j['warnings']:
            warnings.warn(w)
    # TODO: really should be url encoding...
    if destination_frame: j["destination_frame"] = destination_frame.replace("%", ".").replace("&", ".")
    if column_names is not None:
        if not isinstance(column_names, list): raise ValueError("col_names should be a list")
        if len(column_names) != len(j["column_types"]): raise ValueError(
            "length of col_names should be equal to the number of columns")
        j["column_names"] = column_names
    if column_types is not None:
        if isinstance(column_types, dict):
            # overwrite dictionary to ordered list of column types. if user didn't specify column type for all names,
            # use type provided by backend
            if j["column_names"] is None:  # no colnames discovered! (C1, C2, ...)
                j["column_names"] = gen_header(j["number_columns"])
            if not set(column_types.keys()).issubset(set(j["column_names"])): raise ValueError(
                "names specified in col_types is not a subset of the column names")
            idx = 0
            column_types_list = []
            for name in j["column_names"]:
                if name in column_types:
                    column_types_list.append(column_types[name])
                else:
                    column_types_list.append(j["column_types"][idx])
                idx += 1
            column_types = column_types_list
        elif isinstance(column_types, list):
            if len(column_types) != len(j["column_types"]): raise ValueError(
                "length of col_types should be equal to the number of columns")
            column_types = [column_types[i] if column_types[i] else j["column_types"][i] for i in
                            range(len(column_types))]
        else:  # not dictionary or list
            raise ValueError("col_types should be a list of types or a dictionary of column names to types")
        j["column_types"] = column_types
    if na_strings is not None:
        if isinstance(na_strings, dict):
            # overwrite dictionary to ordered list of lists of na_strings
            if not j["column_names"]: raise ValueError("column names should be specified")
            if not set(na_strings.keys()).issubset(set(j["column_names"])): raise ValueError(
                "names specified in na_strings is not a subset of the column names")
            j["na_strings"] = [[] for _ in range(len(j["column_names"]))]
            for name, na in na_strings.items():
                idx = j["column_names"].index(name)
                if is_str(na): na = [na]
                for n in na: j["na_strings"][idx].append(quoted(n))
        elif is_list_of_lists(na_strings):
            if len(na_strings) != len(j["column_types"]): raise ValueError(
                "length of na_strings should be equal to the number of columns")
            j["na_strings"] = [[quoted(na) for na in col] if col is not None else [] for col in na_strings]
        elif isinstance(na_strings, list):
            j["na_strings"] = [[quoted(na) for na in na_strings]] * len(j["column_types"])
        else:  # not a dictionary or list
            raise ValueError(
                "na_strings should be a list, a list of lists (one list per column), or a dictionary of column "
                "names to strings which are to be interpreted as missing values")

    # quote column names and column types also when not specified by user
    if j["column_names"]: j["column_names"] = list(map(quoted, j["column_names"]))
    j["column_types"] = list(map(quoted, j["column_types"]))
    return j
Exemple #26
0
    def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None,
              ice_root=None, port="54321+", verbose=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param verbose: If True, then connection info will be printed to the stdout.

        :returns: a new H2OLocalServer instance
        """
        assert jar_path is None or is_str(jar_path), "`jar_path` should be string, got %s" % type(jar_path)
        assert jar_path is None or jar_path.endswith("h2o.jar"), \
            "`jar_path` should be a path to an h2o.jar executable, got %s" % jar_path
        assert is_int(nthreads), "`nthreads` should be integer, got %s" % type(nthreads)
        assert nthreads == -1 or 1 <= nthreads <= 4096, "`nthreads` is out of bounds: %d" % nthreads
        assert isinstance(enable_assertions, bool), \
            "`enable_assertions` should be bool, got %s" % type(enable_assertions)
        assert max_mem_size is None or is_int(max_mem_size), \
            "`max_mem_size` should be integer, got %s" % type(max_mem_size)
        assert max_mem_size is None or max_mem_size >= 1 << 25, "`max_mem_size` too small: %d" % max_mem_size
        assert min_mem_size is None or is_int(min_mem_size), \
            "`min_mem_size` should be integer, got %s" % type(min_mem_size)
        assert min_mem_size is None or max_mem_size is None or min_mem_size <= max_mem_size, \
            "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)
        if ice_root:
            assert is_str(ice_root), "`ice_root` should be string, got %r" % type(ice_root)
            assert os.path.isdir(ice_root), "`ice_root` is not a valid directory: %s" % ice_root
        if port is None: port = "54321+"
        baseport = None
        if is_str(port):
            if port.isdigit():
                port = int(port)
            else:
                assert port[-1] == "+" and port[:-1].isdigit(), \
                    "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port
                baseport = int(port[:-1])
                port = 0
        assert is_int(port), "`port` should be integer (or string). Got: %s" % type(port)

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._ice_root = ice_root
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions,
                          mmax=max_mem_size, mmin=min_mem_size)
        if verbose: print("Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs