Beispiel #1
0
    def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None,
              ice_root=None, port="54321+", verbose=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param verbose: If True, then connection info will be printed to the stdout.

        :returns: a new H2OLocalServer instance
        """
        assert_is_type(jar_path, None, str)
        assert_is_type(port, None, int, str)
        assert_is_type(nthreads, -1, BoundInt(1, 4096))
        assert_is_type(enable_assertions, bool)
        assert_is_type(min_mem_size, None, int)
        assert_is_type(max_mem_size, None, BoundInt(1 << 25))
        assert_is_type(ice_root, None, I(str, os.path.isdir))
        if jar_path:
            assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))

        if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size:
            raise H2OValueError("`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size))
        if port is None: port = "54321+"
        baseport = None
        # TODO: get rid of this port gimmick and have 2 separate parameters.
        if is_type(port, str):
            if port.isdigit():
                port = int(port)
            else:
                if not(port[-1] == "+" and port[:-1].isdigit()):
                    raise H2OValueError("`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port)
                baseport = int(port[:-1])
                port = 0

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._ice_root = ice_root
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions,
                          mmax=max_mem_size, mmin=min_mem_size)
        if verbose: print("  Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Beispiel #2
0
    def request(self, endpoint, data=None, json=None, filename=None):
        """
        Perform a REST API request to the backend H2O server.

        :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4"
        :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple
            key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format.
        :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`.
        :param filename: file to upload to the server. Cannot be used with `data` or `json`.

        :returns: an H2OResponse object representing the server's response
        :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized)
        :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON
        :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid)
        """
        if self._stage == 0: raise H2OConnectionError("Connection not initialized; run .connect() first.")
        if self._stage == -1: raise H2OConnectionError("Connection was closed, and can no longer be used.")

        # Prepare URL
        assert_is_type(endpoint, str)
        match = assert_matches(str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD) (/.*)$")
        method = match.group(1)
        urltail = match.group(2)
        url = self._base_url + urltail

        # Prepare data
        if filename is not None:
            assert_is_type(filename, str)
            assert_is_type(json, None, "Argument `json` should be None when `filename` is used.")
            assert_is_type(data, None, "Argument `data` should be None when `filename` is used.")
            assert_satisfies(method, method == "POST",
                             "File uploads can only be done via POST method, got %s" % method)
        elif data is not None:
            assert_is_type(data, dict)
            assert_is_type(json, None, "Argument `json` should be None when `data` is used.")
        elif json is not None:
            assert_is_type(json, dict)

        data = self._prepare_data_payload(data)
        files = self._prepare_file_payload(filename)
        params = None
        if method == "GET" and data:
            params = data
            data = None

        # Make the request
        start_time = time.time()
        try:
            self._log_start_transaction(endpoint, data, json, files, params)
            headers = {"User-Agent": "H2O Python client/" + sys.version.replace("\n", ""),
                       "X-Cluster": self._cluster_name}
            resp = requests.request(method=method, url=url, data=data, json=json, files=files, params=params,
                                    headers=headers, timeout=self._timeout,
                                    auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
            self._log_end_transaction(start_time, resp)
            return self._process_response(resp)

        except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
            if self._local_server and not self._local_server.is_running():
                self._log_end_exception("Local server has died.")
                raise H2OConnectionError("Local server has died unexpectedly. RIP.")
            else:
                self._log_end_exception(e)
                raise H2OConnectionError("Unexpected HTTP error: %s" % e)
        except requests.exceptions.Timeout as e:
            self._log_end_exception(e)
            elapsed_time = time.time() - start_time
            raise H2OConnectionError("Timeout after %.3fs" % elapsed_time)
        except H2OResponseError as e:
            err = e.args[0]
            err.endpoint = endpoint
            err.payload = (data, json, files, params)
            raise
Beispiel #3
0
    def request(self,
                endpoint,
                data=None,
                json=None,
                filename=None,
                save_to=None):
        """
        Perform a REST API request to the backend H2O server.

        :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4"
        :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple
            key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format.
        :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`.
        :param filename: file to upload to the server. Cannot be used with `data` or `json`.
        :param save_to: if provided, will write the response to that file (additionally, the response will be
            streamed, so large files can be downloaded seamlessly). This parameter can be either a file name,
            or a folder name. If the folder doesn't exist, it will be created automatically.

        :returns: an H2OResponse object representing the server's response (unless ``save_to`` parameter is
            provided, in which case the output file's name will be returned).
        :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized).
        :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON.
        :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid).
        """
        if self._stage == 0:
            raise H2OConnectionError(
                "Connection not initialized; run .connect() first.")
        if self._stage == -1:
            raise H2OConnectionError(
                "Connection was closed, and can no longer be used.")

        # Prepare URL
        assert_is_type(endpoint, str)
        match = assert_matches(
            str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD|TRACE) (/.*)$")
        method = match.group(1)
        urltail = match.group(2)
        url = self._base_url + urltail

        # Prepare data
        if filename is not None:
            assert_is_type(filename, str)
            assert_is_type(
                json, None,
                "Argument `json` should be None when `filename` is used.")
            assert_is_type(
                data, None,
                "Argument `data` should be None when `filename` is used.")
            assert_satisfies(
                method, method == "POST",
                "File uploads can only be done via POST method, got %s" %
                method)
        elif data is not None:
            assert_is_type(data, dict)
            assert_is_type(
                json, None,
                "Argument `json` should be None when `data` is used.")
        elif json is not None:
            assert_is_type(json, dict)

        request_data = self._prepare_data_payload(
            data) if filename is None else self._prepare_file_payload(filename)

        params = None
        if (method == "GET" or method == "DELETE") and data:
            params = request_data
            request_data = None

        stream = False
        if save_to is not None:
            assert_is_type(save_to, str, types.FunctionType)
            stream = True

        if self._cookies is not None and isinstance(self._cookies, list):
            self._cookies = ";".join(self._cookies)

        # Make the request
        start_time = time.time()
        try:
            self._log_start_transaction(endpoint, request_data, json, filename,
                                        params)
            args = self._request_args()
            resp = requests.request(method=method,
                                    url=url,
                                    data=request_data,
                                    json=json,
                                    params=params,
                                    stream=stream,
                                    **args)
            if isinstance(save_to, types.FunctionType):
                save_to = save_to(resp)
            self._log_end_transaction(start_time, resp)
            return self._process_response(resp, save_to)

        except (requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError) as e:
            if self._local_server and not self._local_server.is_running():
                self._log_end_exception("Local server has died.")
                raise H2OConnectionError(
                    "Local server has died unexpectedly. RIP.")
            else:
                self._log_end_exception(e)
                raise H2OConnectionError("Unexpected HTTP error: %s" % e)
        except requests.exceptions.Timeout as e:
            self._log_end_exception(e)
            elapsed_time = time.time() - start_time
            raise H2OConnectionError("Timeout after %.3fs" % elapsed_time)
        except H2OResponseError as e:
            err = e.args[0]
            if isinstance(err, H2OErrorV3):
                err.endpoint = endpoint
                err.payload = (request_data, json, filename, params)
            raise
Beispiel #4
0
    def open(server=None,
             url=None,
             ip=None,
             port=None,
             name=None,
             https=None,
             auth=None,
             verify_ssl_certificates=True,
             cacert=None,
             proxy=None,
             cookies=None,
             verbose=True,
             msgs=None,
             strict_version_check=True):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is attempt to connect to the
        specified server, and check that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On a success, this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param name: H2O cluster name.
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param cacert: Path to a CA bundle file or a directory with certificates of trusted CAs (optional).
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cookies: Cookie (or list of) to add to requests.
        :param verbose: if True, then connection progress info will be printed to the stdout.
        :param strict_version_check: If True, an error will be raised if the client and server versions don't match.
        :param msgs: custom messages to display during connection. This is a tuple (initial message, success message,
            failure message).

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_type(
                ip, None,
                "`ip` should be None when `server` parameter is supplied")
            assert_is_type(
                url, None,
                "`url` should be None when `server` parameter is supplied")
            assert_is_type(
                name, None,
                "`name` should be None when `server` parameter is supplied")
            if not server.is_running():
                raise H2OConnectionError(
                    "Unable to connect to server because it is not running")
            ip = server.ip
            port = server.port
            scheme = server.scheme
            context_path = ''
        elif url is not None:
            assert_is_type(url, str)
            assert_is_type(
                ip, None,
                "`ip` should be None when `url` parameter is supplied")
            assert_is_type(name, str, None)
            # We don't allow any Unicode characters in the URL. Maybe some day we will...
            match = assert_matches(url, H2OConnection.url_pattern)
            scheme = match.group(1)
            ip = match.group(2)
            port = int(match.group(3))
            context_path = '' if match.group(4) is None else "%s" % (
                match.group(4))
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_type(port, str) and port.isdigit(): port = int(port)
            assert_is_type(ip, str)
            assert_is_type(port, int)
            assert_is_type(name, str, None)
            assert_is_type(https, bool)
            assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+")
            assert_satisfies(port, 1 <= port <= 65535)
            scheme = "https" if https else "http"
            context_path = ''

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_type(verify_ssl_certificates, bool)
        assert_is_type(cacert, str, None)
        assert_is_type(proxy, str, None)
        assert_is_type(auth, AuthBase, (str, str), None)
        assert_is_type(cookies, str, [str], None)
        assert_is_type(msgs, None, (str, str, str))

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d%s" % (scheme, ip, port, context_path)
        conn._name = server.name if server else name
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._cacert = cacert
        conn._auth = auth
        conn._cookies = cookies
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." % name)

            if "localhost" in conn.ip() or "127.0.0.1" in conn.ip():
                # Empty list will cause requests library to respect the default behavior.
                # Thus a non-existing proxy is inserted.

                conn._proxies = {
                    "http": None,
                    "https": None,
                }

        try:
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster = conn._test_connection(retries, messages=msgs)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None

            # create a weakref to prevent the atexit callback from keeping hard ref
            # to the connection even after manual close.
            conn_ref = ref(conn)

            def exit_close():
                con = conn_ref()
                if con and con.connected:
                    print("Closing connection %s at exit" % con.session_id)
                    con.close()

            atexit.register(exit_close)
        except Exception:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise

        conn._cluster.check_version(strict=strict_version_check)
        return conn
Beispiel #5
0
    def start(jar_path=None,
              nthreads=-1,
              enable_assertions=True,
              max_mem_size=None,
              min_mem_size=None,
              ice_root=None,
              log_dir=None,
              log_level=None,
              max_log_file_size=None,
              port="54321+",
              name=None,
              extra_classpath=None,
              verbose=True,
              jvm_custom_args=None,
              bind_to_localhost=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param log_dir: Directory for H2O logs to be stored if a new instance is started. Default directory is determined
            by H2O internally.
        :param log_level: The logger level for H2O if a new instance is started.
        :param max_log_file_size: Maximum size of INFO and DEBUG log files. The file is rolled over after a specified 
            size has been reached. (The default is 3MB. Minimum is 1MB and maximum is 99999MB)
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param name: name of the h2o cluster to be started
        :param extra_classpath: List of paths to libraries that should be included on the Java classpath.
        :param verbose: If True, then connection info will be printed to the stdout.
        :param jvm_custom_args: Custom, user-defined arguments for the JVM H2O is instantiated in
        :param bind_to_localhost: A flag indicating whether access to the H2O instance should be restricted to the local
            machine (default) or if it can be reached from other computers on the network.
            Only applicable when H2O is started from the Python client.

        :returns: a new H2OLocalServer instance
        """
        assert_is_type(jar_path, None, str)
        assert_is_type(port, None, int, str)
        assert_is_type(name, None, str)
        assert_is_type(nthreads, -1, BoundInt(1, 4096))
        assert_is_type(enable_assertions, bool)
        assert_is_type(min_mem_size, None, int)
        assert_is_type(max_mem_size, None, BoundInt(1 << 25))
        assert_is_type(log_dir, str, None)
        assert_is_type(log_level, str, None)
        assert_satisfies(
            log_level, log_level
            in [None, "TRACE", "DEBUG", "INFO", "WARN", "ERRR", "FATA"])
        assert_is_type(max_log_file_size, str, None)
        assert_is_type(ice_root, None, I(str, os.path.isdir))
        assert_is_type(extra_classpath, None, [str])
        assert_is_type(jvm_custom_args, list, None)
        assert_is_type(bind_to_localhost, bool)
        if jar_path:
            assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))

        if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size:
            raise H2OValueError(
                "`min_mem_size`=%d is larger than the `max_mem_size`=%d" %
                (min_mem_size, max_mem_size))
        if port is None: port = "54321+"
        baseport = None
        # TODO: get rid of this port gimmick and have 2 separate parameters.
        if is_type(port, str):
            if port.isdigit():
                port = int(port)
            else:
                if not (port[-1] == "+" and port[:-1].isdigit()):
                    raise H2OValueError(
                        "`port` should be of the form 'DDDD+', where D is a digit. Got: %s"
                        % port)
                baseport = int(port[:-1])
                port = 0

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._extra_classpath = extra_classpath
        hs._ice_root = ice_root
        hs._name = name
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port,
                          baseport=baseport,
                          nthreads=int(nthreads),
                          ea=enable_assertions,
                          mmax=max_mem_size,
                          mmin=min_mem_size,
                          jvm_custom_args=jvm_custom_args,
                          bind_to_localhost=bind_to_localhost,
                          log_dir=log_dir,
                          log_level=log_level,
                          max_log_file_size=max_log_file_size)
        if verbose:
            print("  Server is running at %s://%s:%d" %
                  (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Beispiel #6
0
    def open(server=None,
             url=None,
             ip=None,
             port=None,
             https=None,
             auth=None,
             verify_ssl_certificates=True,
             proxy=None,
             cluster_name=None,
             verbose=True):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is it attempts to connect to the
        specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only.
        :param verbose: if True, then connection progress info will be printed to the stdout.

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_type(
                ip, None,
                "`ip` should be None when `server` parameter is supplied")
            assert_is_type(
                url, None,
                "`ip` should be None when `server` parameter is supplied")
            if not server.is_running():
                raise H2OConnectionError(
                    "Unable to connect to server because it is not running")
            ip = server.ip
            port = server.port
            scheme = server.scheme
        elif url is not None:
            assert_is_type(url, str)
            assert_is_type(
                ip, None,
                "`ip` should be None when `url` parameter is supplied")
            # We don't allow any Unicode characters in the URL. Maybe some day we will...
            match = assert_matches(
                url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$")
            scheme = match.group(1)
            ip = match.group(2)
            port = int(match.group(3))
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_str(port) and port.isdigit(): port = int(port)
            assert_is_type(ip, str)
            assert_is_type(port, int)
            assert_is_type(https, bool)
            assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+")
            assert_satisfies(port, 1 <= port <= 65535)
            scheme = "https" if https else "http"

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_type(verify_ssl_certificates, bool)
        assert_is_type(proxy, str, None)
        assert_is_type(auth, AuthBase, (str, str), None)
        assert_is_type(cluster_name, str, None)

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d" % (scheme, ip, port)
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._auth = auth
        conn._cluster_name = cluster_name
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." %
                         os.environ[name])

        try:
            # Make a fake _session_id, otherwise .request() will complain that the connection is not initialized
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster_info = conn._test_connection(retries)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None
            atexit.register(lambda: conn.close())
        except:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise
        return conn
Beispiel #7
0
    def request(self, endpoint, data=None, json=None, filename=None):
        """
        Perform a REST API request to the backend H2O server.

        :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4"
        :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple
            key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format.
        :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`.
        :param filename: file to upload to the server. Cannot be used with `data` or `json`.

        :returns: an H2OResponse object representing the server's response
        :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized)
        :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON
        :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid)
        """
        if self._stage == 0:
            raise H2OConnectionError(
                "Connection not initialized; run .connect() first.")
        if self._stage == -1:
            raise H2OConnectionError(
                "Connection was closed, and can no longer be used.")

        # Prepare URL
        assert_is_type(endpoint, str)
        match = assert_matches(str(endpoint),
                               r"^(GET|POST|PUT|DELETE|PATCH|HEAD) (/.*)$")
        method = match.group(1)
        urltail = match.group(2)
        url = self._base_url + urltail

        # Prepare data
        if filename is not None:
            assert_is_type(filename, str)
            assert_is_type(
                json, None,
                "Argument `json` should be None when `filename` is used.")
            assert_is_type(
                data, None,
                "Argument `data` should be None when `filename` is used.")
            assert_satisfies(
                method, method == "POST",
                "File uploads can only be done via POST method, got %s" %
                method)
        elif data is not None:
            assert_is_type(data, dict)
            assert_is_type(
                json, None,
                "Argument `json` should be None when `data` is used.")
        elif json is not None:
            assert_is_type(json, dict)

        data = self._prepare_data_payload(data)
        files = self._prepare_file_payload(filename)
        params = None
        if method == "GET" and data:
            params = data
            data = None

        # Make the request
        start_time = time.time()
        try:
            self._log_start_transaction(endpoint, data, json, files, params)
            headers = {
                "User-Agent":
                "H2O Python client/" + sys.version.replace("\n", ""),
                "X-Cluster": self._cluster_name
            }
            resp = requests.request(method=method,
                                    url=url,
                                    data=data,
                                    json=json,
                                    files=files,
                                    params=params,
                                    headers=headers,
                                    timeout=self._timeout,
                                    auth=self._auth,
                                    verify=self._verify_ssl_cert,
                                    proxies=self._proxies)
            self._log_end_transaction(start_time, resp)
            return self._process_response(resp)

        except (requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError) as e:
            if self._local_server and not self._local_server.is_running():
                self._log_end_exception("Local server has died.")
                raise H2OConnectionError(
                    "Local server has died unexpectedly. RIP.")
            else:
                self._log_end_exception(e)
                raise H2OConnectionError("Unexpected HTTP error: %s" % e)
        except requests.exceptions.Timeout as e:
            self._log_end_exception(e)
            elapsed_time = time.time() - start_time
            raise H2OConnectionError("Timeout after %.3fs" % elapsed_time)
        except H2OResponseError as e:
            err = e.args[0]
            err.endpoint = endpoint
            err.payload = (data, json, files, params)
            raise
Beispiel #8
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr",
            "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"}
        :param thresholds: A value (or list of values) between 0 and 1
        :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix
            (if there is only one)
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None: metrics = ["f1"]

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        assert_is_type(thresholds_list, [numeric])
        assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list))

        if not all(m.lower() in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy",
                         "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list):
            raise ValueError(
                "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, "
                "f2, f1, mean_per_class_accuracy")

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for t in thresholds_list:
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[metrics_thresholds.index(t)]
                table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(
                    actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms
Beispiel #9
0
def test_asserts():
    """Test type-checking functionality."""
    def assert_error(*args, **kwargs):
        """Check that assert_is_type() with given arguments throws an error."""
        try:
            assert_is_type(*args, **kwargs)
            raise RuntimeError("Failed to throw an exception")
        except H2OTypeError as e:
            # Check whether the message can stringify properly
            message = str(e)
            assert len(message) < 1000
            return

    class A(object):
        pass

    class B(A):
        pass

    class C(A):
        pass

    class D(B, C):
        pass

    assert_is_type(3, int)
    assert_is_type(2**100, int)
    assert_is_type("3", str)
    assert_is_type(u"3", str)
    assert_is_type("foo", u"foo")
    assert_is_type(u"foo", "foo")
    assert_is_type("I", *list("ABCDEFGHIJKL"))
    assert_is_type(False, bool)
    assert_is_type(43, str, bool, int)
    assert_is_type(4 / 3, int, float)
    assert_is_type(None, None)
    assert_is_type(None, A, str, None)
    assert_is_type([], [float])
    assert_is_type([1, 4, 5], [int])
    assert_is_type([1.0, 2, 5], [int, float])
    assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]])
    assert_is_type([1, None, 2], [int, float, None])
    assert_is_type({1, 5, 1, 1, 3}, {int})
    assert_is_type({1, "hello", 3}, {int, str})
    assert_is_type({"foo": 1, "bar": 2}, {str: int})
    assert_is_type({
        "foo": 3,
        "bar": [5],
        "baz": None
    }, {str: U(int, None, [int])})
    assert_is_type({
        "foo": 1,
        "bar": 2
    }, {
        "foo": int,
        "bar": U(int, float, None),
        "baz": bool
    })
    assert_is_type((1, 3), (int, int))
    assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str))
    assert_is_type([1, [2], [{3}]], [int, [int], [{3}]])
    assert_is_type(A(), None, A)
    assert_is_type(B(), None, A)
    assert_is_type(C(), A, B)
    assert_is_type(D(), I(A, B, C))
    assert_is_type(A, type)
    for a in range(-2, 5):
        assert_is_type(a, -2, -1, 0, 1, 2, 3, 4)
    assert_is_type(1, numeric)
    assert_is_type(2.2, numeric)
    assert_is_type(1, I(numeric, object))

    assert_error(3, str)
    assert_error("Z", *list("ABCDEFGHIJKL"))
    assert_error(u"Z", "a", "...", "z")
    assert_error("X", u"x")
    assert_error(0, bool)
    assert_error(0, float, str, bool, None)
    assert_error([1, 5], [float])
    assert_error((1, 3), (int, str), (str, int), (float, float))
    assert_error(A(), None, B)
    assert_error(A, A)
    assert_error({
        "foo": 1,
        "bar": "2"
    }, {
        "foo": int,
        "bar": U(int, float, None)
    })
    assert_error(3, 0, 2, 4)
    assert_error(None, numeric)
    assert_error("sss", numeric)
    assert_error(B(), I(A, B, C))
    assert_error(2, I(int, str))

    url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$"
    assert_matches("Hello, world!", r"^(\w+), (\w*)!$")
    assert_matches("http://127.0.0.1:3233/", url_regex)
    m = assert_matches("https://localhost:54321", url_regex)
    assert m.group(1) == "https"
    assert m.group(2) == "localhost"
    assert m.group(3) == "54321"

    x = 5
    assert_satisfies(x, x < 1000)
    assert_satisfies(x, x**x > 1000)
    assert_satisfies(url_regex, url_regex.lower() == url_regex)
    try:
        assert_satisfies(url_regex, url_regex.upper() == url_regex)
    except H2OValueError as e:
        assert "url_regex.upper() == url_regex" in str(
            e), "Error message is bad: " + str(e)
Beispiel #10
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) among metrics listed in :const:`max_metrics`. Defaults to 'f1'.
        :param thresholds: A value (or list of values) between 0 and 1.
        :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix
            (if there is only one).
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None:
            metrics = ['f1']

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        assert_is_type(thresholds_list, [numeric])
        assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list))

        if not all(m.lower() in H2OBinomialModelMetrics.max_metrics for m in metrics_list):
            raise ValueError("The only allowable metrics are {}", ', '.join(H2OBinomialModelMetrics.max_metrics))

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)
        first_metrics_thresholds_offset = len(thresholds_list) - len(metrics_thresholds)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for i, t in enumerate(thresholds_list):
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[i - first_metrics_thresholds_offset]
                table_header = "Confusion Matrix (Act/Pred) for max {} @ threshold = {}".format(m, actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = {}".format(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms
Beispiel #11
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) among metrics listed in :const:`max_metrics`. Defaults to 'f1'.
        :param thresholds: A value (or list of values) between 0 and 1.
        :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix
            (if there is only one).
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None:
            metrics = ['f1']

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        assert_is_type(thresholds_list, [numeric])
        assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list))

        if not all(m.lower() in H2OBinomialModelMetrics.max_metrics for m in metrics_list):
            raise ValueError("The only allowable metrics are {}", ', '.join(H2OBinomialModelMetrics.max_metrics))

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)
        first_metrics_thresholds_offset = len(thresholds_list) - len(metrics_thresholds)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for i, t in enumerate(thresholds_list):
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[i - first_metrics_thresholds_offset]
                table_header = "Confusion Matrix (Act/Pred) for max {} @ threshold = {}".format(m, actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = {}".format(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms
Beispiel #12
0
def test_asserts():
    """Test type-checking functionality."""
    def assert_error(*args, **kwargs):
        """Check that assert_is_type() with given arguments throws an error."""
        try:
            assert_is_type(*args, **kwargs)
            raise RuntimeError("Failed to throw an exception")
        except H2OTypeError as e:
            # Check whether the message can stringify properly
            message = str(e)
            assert len(message) < 1000
            return

    class A(object): pass

    class B(A): pass

    class C(A): pass

    class D(B, C): pass


    assert_is_type(3, int)
    assert_is_type(2**100, int)
    assert_is_type("3", str)
    assert_is_type(u"3", str)
    assert_is_type("foo", u"foo")
    assert_is_type(u"foo", "foo")
    assert_is_type("I", *list("ABCDEFGHIJKL"))
    assert_is_type(False, bool)
    assert_is_type(43, str, bool, int)
    assert_is_type(4 / 3, int, float)
    assert_is_type(None, None)
    assert_is_type(None, A, str, None)
    assert_is_type([], [float])
    assert_is_type([1, 4, 5], [int])
    assert_is_type([1.0, 2, 5], [int, float])
    assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]])
    assert_is_type([1, None, 2], [int, float, None])
    assert_is_type({1, 5, 1, 1, 3}, {int})
    assert_is_type({1, "hello", 3}, {int, str})
    assert_is_type({"foo": 1, "bar": 2}, {str: int})
    assert_is_type({"foo": 3, "bar": [5], "baz": None}, {str: U(int, None, [int])})
    assert_is_type({"foo": 1, "bar": 2}, {"foo": int, "bar": U(int, float, None), "baz": bool})
    assert_is_type((1, 3), (int, int))
    assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str))
    assert_is_type([1, [2], [{3}]], [int, [int], [{3}]])
    assert_is_type(A(), None, A)
    assert_is_type(B(), None, A)
    assert_is_type(C(), A, B)
    assert_is_type(D(), I(A, B, C))
    assert_is_type(A, type)
    for a in range(-2, 5):
        assert_is_type(a, -2, -1, 0, 1, 2, 3, 4)
    assert_is_type(1, numeric)
    assert_is_type(2.2, numeric)
    assert_is_type(1, I(numeric, object))

    assert_error(3, str)
    assert_error("Z", *list("ABCDEFGHIJKL"))
    assert_error(u"Z", "a", "...", "z")
    assert_error("X", u"x")
    assert_error(0, bool)
    assert_error(0, float, str, bool, None)
    assert_error([1, 5], [float])
    assert_error((1, 3), (int, str), (str, int), (float, float))
    assert_error(A(), None, B)
    assert_error(A, A)
    assert_error({"foo": 1, "bar": "2"}, {"foo": int, "bar": U(int, float, None)})
    assert_error(3, 0, 2, 4)
    assert_error(None, numeric)
    assert_error("sss", numeric)
    assert_error(B(), I(A, B, C))
    assert_error(2, I(int, str))

    url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$"
    assert_matches("Hello, world!", r"^(\w+), (\w*)!$")
    assert_matches("http://127.0.0.1:3233/", url_regex)
    m = assert_matches("https://localhost:54321", url_regex)
    assert m.group(1) == "https"
    assert m.group(2) == "localhost"
    assert m.group(3) == "54321"

    x = 5
    assert_satisfies(x, x < 1000)
    assert_satisfies(x, x ** x > 1000)
    assert_satisfies(url_regex, url_regex.lower() == url_regex)
    try:
        assert_satisfies(url_regex, url_regex.upper() == url_regex)
    except H2OValueError as e:
        assert "url_regex.upper() == url_regex" in str(e), "Error message is bad: " + str(e)
Beispiel #13
0
    def start(jar_path=None,
              nthreads=-1,
              enable_assertions=True,
              max_mem_size=None,
              min_mem_size=None,
              ice_root=None,
              port="54321+",
              extra_classpath=None,
              verbose=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param extra_classpath List of paths to libraries that should be included on the Java classpath.
        :param verbose: If True, then connection info will be printed to the stdout.

        :returns: a new H2OLocalServer instance
        """
        assert_is_type(jar_path, None, str)
        assert_is_type(port, None, int, str)
        assert_is_type(nthreads, -1, BoundInt(1, 4096))
        assert_is_type(enable_assertions, bool)
        assert_is_type(min_mem_size, None, int)
        assert_is_type(max_mem_size, None, BoundInt(1 << 25))
        assert_is_type(ice_root, None, I(str, os.path.isdir))
        assert_is_type(extra_classpath, None, [str])
        if jar_path:
            assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))

        if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size:
            raise H2OValueError(
                "`min_mem_size`=%d is larger than the `max_mem_size`=%d" %
                (min_mem_size, max_mem_size))
        if port is None: port = "54321+"
        baseport = None
        # TODO: get rid of this port gimmick and have 2 separate parameters.
        if is_type(port, str):
            if port.isdigit():
                port = int(port)
            else:
                if not (port[-1] == "+" and port[:-1].isdigit()):
                    raise H2OValueError(
                        "`port` should be of the form 'DDDD+', where D is a digit. Got: %s"
                        % port)
                baseport = int(port[:-1])
                port = 0

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._extra_classpath = extra_classpath
        hs._ice_root = ice_root
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port,
                          baseport=baseport,
                          nthreads=int(nthreads),
                          ea=enable_assertions,
                          mmax=max_mem_size,
                          mmin=min_mem_size)
        if verbose:
            print("  Server is running at %s://%s:%d" %
                  (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Beispiel #14
0
def test_asserts():
    """Test type-checking functionality."""
    def assert_error(*args, **kwargs):
        """Check that assert_is_type() with given arguments throws an error."""
        try:
            assert_is_type(*args, **kwargs)
            raise RuntimeError("Failed to throw an exception")
        except H2OTypeError as exc:
            # Check whether the message can stringify properly
            message = str(exc)
            assert len(message) < 1000
            return

    class A(object):
        """Dummy A."""

    class B(A):
        """Dummy B."""

    class C(A):
        """Dummy C."""

    class D(B, C):
        """Dummy D."""

    assert_is_type(3, int)
    assert_is_type(2**100, int)
    assert_is_type("3", str)
    assert_is_type(u"3", str)
    assert_is_type("foo", u"foo")
    assert_is_type(u"foo", "foo")
    assert_is_type("I", *list("ABCDEFGHIJKL"))
    assert_is_type(False, bool)
    assert_is_type(43, str, bool, int)
    assert_is_type(4 / 3, int, float)
    assert_is_type(None, None)
    assert_is_type(None, A, str, None)
    assert_is_type([], [float])
    assert_is_type([1, 4, 5], [int])
    assert_is_type([1.0, 2, 5], [int, float])
    assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]])
    assert_is_type([1, None, 2], [int, float, None])
    assert_is_type({1, 5, 1, 1, 3}, {int})
    assert_is_type({1, "hello", 3}, {int, str})
    assert_is_type({"foo": 1, "bar": 2}, {str: int})
    assert_is_type({"foo": 3, "bar": [5], "baz": None}, {str: U(int, None, [int])})
    assert_is_type({"foo": 1, "bar": 2}, {"foo": int, "bar": U(int, float, None), "baz": bool})
    assert_is_type({}, {"spam": int, "egg": int})
    assert_is_type({"spam": 10}, {"spam": int, "egg": int})
    assert_is_type({"egg": 1}, {"spam": int, "egg": int})
    assert_is_type({"egg": 1, "spam": 10}, {"spam": int, "egg": int})
    assert_is_type({"egg": 1, "spam": 10}, Dict(egg=int, spam=int))
    assert_is_type({"egg": 1, "spam": 10}, Dict(egg=int, spam=int, ham=U(int, None)))
    assert_is_type((1, 3), (int, int))
    assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str))
    assert_is_type((1, 3, 4, 7, 11, 18), Tuple(int))
    assert_is_type((1, 3, "spam", 3, "egg"), Tuple(int, str))
    assert_is_type([1, [2], [{3}]], [int, [int], [{3}]])
    assert_is_type(A(), None, A)
    assert_is_type(B(), None, A)
    assert_is_type(C(), A, B)
    assert_is_type(D(), I(A, B, C))
    assert_is_type(A, type)
    assert_is_type(B, lambda aa: issubclass(aa, A))
    for a in range(-2, 5):
        assert_is_type(a, -2, -1, 0, 1, 2, 3, 4)
    assert_is_type(1, numeric)
    assert_is_type(2.2, numeric)
    assert_is_type(1, I(numeric, object))
    assert_is_type(34, I(int, NOT(0)))
    assert_is_type(["foo", "egg", "spaam"], [I(str, NOT("spam"))])
    assert_is_type(H2OFrame(), h2oframe)
    assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0, 0]],
                   I([[numeric]], lambda v: all(len(vi) == len(v[0]) for vi in v)))
    assert_is_type([None, None, float('nan'), None, "N/A"], [None, "N/A", I(float, math.isnan)])

    assert_error(3, str)
    assert_error(0, float)
    assert_error("Z", *list("ABCDEFGHIJKL"))
    assert_error(u"Z", "a", "...", "z")
    assert_error("X", u"x")
    assert_error(0, bool)
    assert_error(0, float, str, bool, None)
    assert_error([1, 5], [float])
    assert_error((1, 3), (int, str), (str, int), (float, float))
    assert_error(A(), None, B)
    assert_error(A, A)
    assert_error(A, lambda aa: issubclass(aa, B))
    assert_error(135, I(int, lambda x: 0 <= x <= 100))
    assert_error({"foo": 1, "bar": "2"}, {"foo": int, "bar": U(int, float, None)})
    assert_error(3, 0, 2, 4)
    assert_error(None, numeric)
    assert_error("sss", numeric)
    assert_error(B(), I(A, B, C))
    assert_error(2, I(int, str))
    assert_error(0, I(int, NOT(0)))
    assert_error(None, NOT(None))
    assert_error((1, 3, "2", 3), Tuple(int))
    assert_error({"spam": 10}, Dict(spam=int, egg=int))
    assert_error({"egg": 5}, Dict(spam=int, egg=int))
    assert_error(False, h2oframe, pandas_dataframe, numpy_ndarray)
    assert_error([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]],
                 I([[numeric]], lambda v: all(len(vi) == len(v[0]) for vi in v)))
    try:
        # Cannot use `assert_error` here because typechecks module cannot detect args in (*args, *kwargs)
        assert_is_type(10000000, I(int, lambda port: 1 <= port <= 65535))
        assert False, "Failed to throw an exception"
    except H2OTypeError as e:
        assert "integer & 1 <= port <= 65535" in str(e), "Bad error message: '%s'" % e

    url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$"
    assert_matches("Hello, world!", r"^(\w+), (\w*)!$")
    assert_matches("http://127.0.0.1:3233/", url_regex)
    m = assert_matches("https://localhost:54321", url_regex)
    assert m.group(1) == "https"
    assert m.group(2) == "localhost"
    assert m.group(3) == "54321"

    x = 5
    assert_satisfies(x, x < 1000)
    assert_satisfies(x, x ** x > 1000)
    assert_satisfies(url_regex, url_regex.lower() == url_regex)
    try:
        assert_satisfies(url_regex, url_regex.upper() == url_regex)
    except H2OValueError as e:
        assert "url_regex.upper() == url_regex" in str(e), "Error message is bad: " + str(e)

    try:
        import pandas
        import numpy
        assert_is_type(pandas.DataFrame(), pandas_dataframe)
        assert_is_type(numpy.ndarray(shape=(5,)), numpy_ndarray)
    except ImportError:
        pass
Beispiel #15
0
    def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None,
              ice_root=None, port="54321+", verbose=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param verbose: If True, then connection info will be printed to the stdout.

        :returns: a new H2OLocalServer instance
        """
        assert_is_type(jar_path, None, str)
        assert_is_type(port, None, int, str)
        assert_is_type(nthreads, int)
        assert_is_type(enable_assertions, bool)
        assert_is_type(min_mem_size, None, int)
        assert_is_type(max_mem_size, None, int)
        assert_is_type(ice_root, None, str)
        if jar_path:
            assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))

        assert nthreads == -1 or 1 <= nthreads <= 4096, "`nthreads` is out of bounds: %d" % nthreads
        assert max_mem_size is None or max_mem_size >= 1 << 25, "`max_mem_size` too small: %d" % max_mem_size
        assert min_mem_size is None or max_mem_size is None or min_mem_size <= max_mem_size, \
            "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)
        if ice_root:
            assert os.path.isdir(ice_root), "`ice_root` is not a valid directory: %s" % ice_root
        if port is None: port = "54321+"
        baseport = None
        if is_type(port, str):
            if port.isdigit():
                port = int(port)
            else:
                assert port[-1] == "+" and port[:-1].isdigit(), \
                    "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port
                baseport = int(port[:-1])
                port = 0

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._ice_root = ice_root
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions,
                          mmax=max_mem_size, mmin=min_mem_size)
        if verbose: print("  Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Beispiel #16
0
    def open(server=None, url=None, ip=None, port=None, https=None, auth=None, verify_ssl_certificates=True,
             proxy=None, cluster_name=None, verbose=True, _msgs=None):
        r"""
        Establish connection to an existing H2O server.

        The connection is not kept alive, so what this method actually does is it attempts to connect to the
        specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server
        cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new
        :class:`H2OConnection` object, and it is the only "official" way to create instances of this class.

        There are 3 ways to specify the target to connect to (these settings are mutually exclusive):

            * pass a ``server`` option,
            * pass the full ``url`` for the connection,
            * provide a triple of parameters ``ip``, ``port``, ``https``.

        :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference
            between connecting to a local server by specifying its ip and address, and connecting through
            an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle
            will allow us to query the server status through OS, and potentially provide snapshot of the server's
            error log in the exception information.
        :param url: full url of the server to connect to.
        :param ip: target server's IP address or hostname (default "localhost").
        :param port: H2O server's port (default 54321).
        :param https: if True then connect using https instead of http (default False).
        :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This
            setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When
            used, it will generate a warning from the requests library. Has no effect when ``https`` is False.
        :param auth: authentication token for connecting to the remote server. This can be either a
            (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in
            the ``requests.auth`` module.
        :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module
            will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We
            check for the presence of these variables and issue a warning if they are found. In order to suppress
            that warning and use proxy from the environment, pass ``proxy="(default)"``.
        :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only.
        :param verbose: if True, then connection progress info will be printed to the stdout.
        :param _msgs: custom messages to display during connection. This is a tuple (initial message, success message,
            failure message).

        :returns: A new :class:`H2OConnection` instance.
        :raises H2OConnectionError: if the server cannot be reached.
        :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the
            client itself should decide whether it wants to retry or not).
        """
        if server is not None:
            assert_is_type(server, H2OLocalServer)
            assert_is_type(ip, None, "`ip` should be None when `server` parameter is supplied")
            assert_is_type(url, None, "`ip` should be None when `server` parameter is supplied")
            if not server.is_running():
                raise H2OConnectionError("Unable to connect to server because it is not running")
            ip = server.ip
            port = server.port
            scheme = server.scheme
        elif url is not None:
            assert_is_type(url, str)
            assert_is_type(ip, None, "`ip` should be None when `url` parameter is supplied")
            # We don't allow any Unicode characters in the URL. Maybe some day we will...
            match = assert_matches(url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$")
            scheme = match.group(1)
            ip = match.group(2)
            port = int(match.group(3))
        else:
            if ip is None: ip = str("localhost")
            if port is None: port = 54321
            if https is None: https = False
            if is_str(port) and port.isdigit(): port = int(port)
            assert_is_type(ip, str)
            assert_is_type(port, int)
            assert_is_type(https, bool)
            assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+")
            assert_satisfies(port, 1 <= port <= 65535)
            scheme = "https" if https else "http"

        if verify_ssl_certificates is None: verify_ssl_certificates = True
        assert_is_type(verify_ssl_certificates, bool)
        assert_is_type(proxy, str, None)
        assert_is_type(auth, AuthBase, (str, str), None)
        assert_is_type(cluster_name, str, None)
        assert_is_type(_msgs, None, (str, str, str))

        conn = H2OConnection()
        conn._verbose = bool(verbose)
        conn._local_server = server
        conn._base_url = "%s://%s:%d" % (scheme, ip, port)
        conn._verify_ssl_cert = bool(verify_ssl_certificates)
        conn._auth = auth
        conn._cluster_name = cluster_name
        conn._proxies = None
        if proxy and proxy != "(default)":
            conn._proxies = {scheme: proxy}
        elif not proxy:
            # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504]
            # To suppress the warning pass proxy = "(default)".
            for name in os.environ:
                if name.lower() == scheme + "_proxy":
                    warn("Proxy is defined in the environment: %s. "
                         "This may interfere with your H2O Connection." % os.environ[name])

        try:
            retries = 20 if server else 5
            conn._stage = 1
            conn._timeout = 3.0
            conn._cluster_info = conn._test_connection(retries, messages=_msgs)
            # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
            # setting for now, for no good reason other than to ignore all those bugs :(
            conn._timeout = None
            # This is a good one! On the surface it registers a callback to be invoked when the script is about
            # to finish, but it also has a side effect in that the reference to current connection will be held
            # by the ``atexit`` service till the end -- which means it will never be garbage-collected.
            atexit.register(lambda: conn.close())
        except Exception:
            # Reset _session_id so that we know the connection was not initialized properly.
            conn._stage = 0
            raise
        return conn
Beispiel #17
0
    def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None,
              ice_root=None, log_dir=None, log_level=None, port="54321+", name=None, extra_classpath=None,
              verbose=True, jvm_custom_args=None, bind_to_localhost=True):
        """
        Start new H2O server on the local machine.

        :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
            locations returned by `._jar_paths()`.
        :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
            -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
        :param enable_assertions: If True, pass `-ea` option to the JVM.
        :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
        :param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
        :param log_dir: Directory for H2O logs to be stored if a new instance is started. Default directory is determined
        by H2O internally.
        :param log_level: The logger level for H2O if a new instance is started.
        :param ice_root: A directory where H2O stores its temporary files. Default location is determined by
            tempfile.mkdtemp().
        :param port: Port where to start the new server. This could be either an integer, or a string of the form
            "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
        :param name: name of the h2o cluster to be started
        :param extra_classpath List of paths to libraries that should be included on the Java classpath.
        :param verbose: If True, then connection info will be printed to the stdout.
        :param jvm_custom_args Custom, user-defined arguments for the JVM H2O is instantiated in
        :param bind_to_localhost A flag indicating whether access to the H2O instance should be restricted to the local
            machine (default) or if it can be reached from other computers on the network.
            Only applicable when H2O is started from the Python client.

        :returns: a new H2OLocalServer instance
        """
        assert_is_type(jar_path, None, str)
        assert_is_type(port, None, int, str)
        assert_is_type(name, None, str)
        assert_is_type(nthreads, -1, BoundInt(1, 4096))
        assert_is_type(enable_assertions, bool)
        assert_is_type(min_mem_size, None, int)
        assert_is_type(max_mem_size, None, BoundInt(1 << 25))
        assert_is_type(log_dir, str, None)
        assert_is_type(log_level, str, None)
        assert_satisfies(log_level, log_level in [None, "TRACE", "DEBUG", "INFO", "WARN", "ERRR", "FATA"])
        assert_is_type(ice_root, None, I(str, os.path.isdir))
        assert_is_type(extra_classpath, None, [str])
        assert_is_type(jvm_custom_args, list, None)
        assert_is_type(bind_to_localhost, bool)
        if jar_path:
            assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))

        if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size:
            raise H2OValueError("`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size))
        if port is None: port = "54321+"
        baseport = None
        # TODO: get rid of this port gimmick and have 2 separate parameters.
        if is_type(port, str):
            if port.isdigit():
                port = int(port)
            else:
                if not(port[-1] == "+" and port[:-1].isdigit()):
                    raise H2OValueError("`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port)
                baseport = int(port[:-1])
                port = 0

        hs = H2OLocalServer()
        hs._verbose = bool(verbose)
        hs._jar_path = hs._find_jar(jar_path)
        hs._extra_classpath = extra_classpath
        hs._ice_root = ice_root
        hs._name = name
        if not ice_root:
            hs._ice_root = tempfile.mkdtemp()
            hs._tempdir = hs._ice_root

        if verbose: print("Attempting to start a local H2O server...")
        hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions,
                          mmax=max_mem_size, mmin=min_mem_size, jvm_custom_args=jvm_custom_args,
                          bind_to_localhost=bind_to_localhost, log_dir=log_dir, log_level=log_level)
        if verbose: print("  Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port))
        atexit.register(lambda: hs.shutdown())
        return hs
Beispiel #18
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr",
            "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"}
        :param thresholds: A value (or list of values) between 0 and 1
        :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix
            (if there is only one).
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None: metrics = ["f1"]

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        assert_is_type(thresholds_list, [numeric])
        assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list))

        if not all(m.lower() in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity",
                                 "accuracy", "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list):
            raise ValueError(
                "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, "
                "f2, f1, mean_per_class_accuracy")

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for t in thresholds_list:
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[metrics_thresholds.index(t)]
                table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(
                    actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms