Esempio n. 1
0
def h2o_plot(model, model_file_prefix):
    '''
        Plot an h2o tree. From:
        https://resources.oreilly.com/oriole/interpretable-machine-learning-with-python-xgboost-and-h2o/blob/master/dt_surrogate_loco.ipynb

    :param model:
    :param model_file_prefix:
    :return:
    '''
    mojo_path = model.download_mojo()

    hs = H2OLocalServer()
    h2o_jar_path = hs._find_jar()

    gv_file_name = model_file_prefix + '.gv'
    gv_args = str('java -cp ' + h2o_jar_path +
                  ' hex.genmodel.tools.PrintMojo --tree 0 --decimalplaces 2 -i '
                  + mojo_path + ' -o').split()
    gv_args.append(gv_file_name)

    subprocess.call(gv_args)

    # Compute complexity of dt based on the resulting graphviz
    complexity = open(gv_file_name, 'r').read().count("shape=box")

    png_file_name = model_file_prefix + '.png'
    png_args = str('dot -Tpng ' + gv_file_name + ' -o ').split()
    png_args.append(png_file_name)

    subprocess.call(png_args)

    return complexity
Esempio n. 2
0
def init(url=None, ip=None, port=None, https=None, insecure=False, username=None, password=None, cluster_name=None,
         proxy=None, start_h2o=True, nthreads=-1, ice_root=None, enable_assertions=True,
         max_mem_size=None, min_mem_size=None, strict_version_check=True, **kwargs):
    """
    Attempt to connect to a local server, or if not successful start a new server and connect to it.

    :param url:
    :param ip:
    :param port:
    :param https:
    :param insecure:
    :param username:
    :param password:
    :param cluster_name:
    :param proxy:
    :param start_h2o:
    :param nthreads:
    :param ice_root:
    :param enable_assertions:
    :param max_mem_size:
    :param min_mem_size:
    :param strict_version_check:
    :param kwargs: (all other deprecated attributes)
    :returns: nothing
    """
    scheme = "https" if https else "http"
    proxy = proxy[scheme] if proxy is not None and scheme in proxy else \
        kwargs["proxies"][scheme] if "proxies" in kwargs and scheme in kwargs["proxies"] else None
    mmax = int(max_mem_size) if max_mem_size is not None else \
        kwargs["max_mem_size_GB"] << 30 if "max_mem_size_GB" in kwargs else None
    mmin = int(min_mem_size) if min_mem_size is not None else \
        kwargs["min_mem_size_GB"] << 30 if "min_mem_size_GB" in kwargs else None
    auth = (username, password) if username and password else None
    if not start_h2o:
        print("Warning: if you don't want to start local H2O server, then use of `h2o.connect()` is preferred.")
    if ip and ip != "localhost" and ip != "127.0.0.1" and start_h2o:
        print("Warning: connecting to remote server but falling back to local... Did you mean to use `h2o.connect()`?")
    try:
        connect(url=url, ip=ip, port=port, https=https, verify_ssl_certificates=not insecure, auth=auth,
                proxy=proxy, cluster_name=cluster_name, verbose=True)
    except H2OConnectionError:
        # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server...
        if port and not str(port).endswith("+"):
            port = str(port) + "+"
        if not start_h2o: raise
        global h2oconn
        hs = H2OLocalServer.start(nthreads=nthreads, enable_assertions=enable_assertions, max_mem_size=mmax,
                                  min_mem_size=mmin, ice_root=ice_root, port=port)
        h2oconn = H2OConnection.open(server=hs, https=https, verify_ssl_certificates=not insecure,
                                     auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True)
    if strict_version_check:
        version_check()

    if h2oconn.info().build_too_old:
        print("Warning: Your H2O cluster version is too old ({})! Please download and install the latest version from http://h2o.ai/download/".format(h2oconn.info().build_age))
Esempio n. 3
0
def get_gv(title, model_id, mojo_path):
    """ Utility function to generate graphviz dot file from h2o MOJO using
        a subprocess.

    Args:
        title: Title for displayed decision tree.
        model_id: h2o model identifier.
        mojo_path: Path to saved model MOJO (Java scoring artifact);
                   generated by train_cv_dt function above.

    """

    # locate h2o jar
    hs = H2OLocalServer()
    h2o_jar_path = hs._find_jar()
    print('Discovered H2O jar path:\n', h2o_jar_path)

    # construct command line call to generate graphviz version of
    # tree, see for more information:
    # http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html
    gv_file_name = model_id + '.gv'
    gv_args = str('-cp ' + h2o_jar_path +
                  ' hex.genmodel.tools.PrintMojo --tree 0 -i ' + mojo_path +
                  ' -o').split()
    gv_args.insert(0, 'java')
    gv_args.append(gv_file_name)
    if title is not None:
        gv_args = gv_args + ['--title', title]

    # call constructed command
    print()
    print('Calling external process ...')
    print(' '.join(gv_args))
    # if the line below is failing for you, try instead:
    # _ = subprocess.call(gv_args, shell=True)
    _ = subprocess.call(gv_args)
Esempio n. 4
0
def init(url=None,
         ip=None,
         port=None,
         https=None,
         insecure=False,
         username=None,
         password=None,
         cluster_name=None,
         proxy=None,
         start_h2o=True,
         nthreads=-1,
         ice_root=None,
         enable_assertions=True,
         max_mem_size=None,
         min_mem_size=None,
         strict_version_check=True,
         **kwargs):
    """
    Attempt to connect to a local server, or if not successful start a new server and connect to it.

    :param url:
    :param ip:
    :param port:
    :param https:
    :param insecure:
    :param username:
    :param password:
    :param cluster_name:
    :param proxy:
    :param start_h2o:
    :param nthreads:
    :param ice_root:
    :param enable_assertions:
    :param max_mem_size:
    :param min_mem_size:
    :param strict_version_check:
    :param kwargs: (all other deprecated attributes)
    :returns: nothing
    """
    global h2oconn
    scheme = "https" if https else "http"
    proxy = proxy[scheme] if proxy is not None and scheme in proxy else \
        kwargs["proxies"][scheme] if "proxies" in kwargs and scheme in kwargs["proxies"] else None
    mmax = int(max_mem_size) if max_mem_size is not None else \
        kwargs["max_mem_size_GB"] << 30 if "max_mem_size_GB" in kwargs else None
    mmin = int(min_mem_size) if min_mem_size is not None else \
        kwargs["min_mem_size_GB"] << 30 if "min_mem_size_GB" in kwargs else None
    auth = (username, password) if username and password else None
    if not start_h2o:
        print(
            "Warning: if you don't want to start local H2O server, then use of `h2o.connect()` is preferred."
        )
    if ip and ip != "localhost" and ip != "127.0.0.1" and start_h2o:
        print(
            "Warning: connecting to remote server but falling back to local... Did you mean to use `h2o.connect()`?"
        )
    try:
        h2oconn = H2OConnection.open(
            url=url,
            ip=ip,
            port=port,
            https=https,
            verify_ssl_certificates=not insecure,
            auth=auth,
            proxy=proxy,
            cluster_name=cluster_name,
            verbose=True,
            _msgs=(
                "Checking whether there is an H2O instance running at {url}",
                "connected.", "not found."))
    except H2OConnectionError:
        # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server...
        if port and not str(port).endswith("+"):
            port = str(port) + "+"
        if not start_h2o: raise
        hs = H2OLocalServer.start(nthreads=nthreads,
                                  enable_assertions=enable_assertions,
                                  max_mem_size=mmax,
                                  min_mem_size=mmin,
                                  ice_root=ice_root,
                                  port=port)
        h2oconn = H2OConnection.open(server=hs,
                                     https=https,
                                     verify_ssl_certificates=not insecure,
                                     auth=auth,
                                     proxy=proxy,
                                     cluster_name=cluster_name,
                                     verbose=True)
    if strict_version_check:
        version_check()
    def display_tree(self, title, shell=False):
        if not shell:
            predict_probs = pd.DataFrame(self.model.predict_proba(
                self.X_test)[:, 1],
                                         columns=['prob_pred'])
            test = pd.DataFrame(np.c_[self.X_test, self.y_test],
                                columns=self.feature_names.append(
                                    pd.Index(['label'])))
            test_yhat = h2o.H2OFrame(pd.concat([test, predict_probs], axis=1))

            # initialize single tree surrogate model
            surrogate = H2ORandomForestEstimator(
                ntrees=1,  # use only one tree
                sample_rate=1,  # use all rows in that tree
                mtries=-2,  # use all columns in that tree
                max_depth=self.
                max_depth,  # shallow trees are easier to understand
                seed=12345,  # random seed for reproducibility
                model_id=self.model_id
            )  # gives MOJO artifact a recognizable name

            # train single tree surrogate model
            surrogate.train(x=list(self.feature_names),
                            y='label',
                            training_frame=test_yhat)

            # persist MOJO (compiled, representation of trained model)
            # from which to generate plot of surrogate
            mojo_path = surrogate.download_mojo(path='.')

            # locate h2o jar
            hs = H2OLocalServer()
            h2o_jar_path = hs._find_jar()

            # construct command line call to generate graphviz version of
            # surrogate tree see for more information:
            # http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html
            model_id = self.model_id
            gv_file_name = model_id + '.gv'
            gv_args = str('-cp ' + h2o_jar_path +
                          ' hex.genmodel.tools.PrintMojo --tree 0 -i ' +
                          mojo_path + ' -o').split()
            gv_args.insert(0, 'java')
            gv_args.append(gv_file_name)
            if title is not None:
                gv_args = gv_args + ['--title', title]

            # if the line below is failing for you, try instead:
            # _ = subprocess.call(gv_args, shell=True)
            _ = subprocess.call(gv_args)

            # construct call to generate PNG from
            # graphviz representation of the tree
            png_file_name = model_id + '.png'
            png_args = str('dot -Tpng ' + gv_file_name + ' -o ' +
                           png_file_name)
            png_args = png_args.split()

            # if the line below is failing for you, try instead:
            # _ = subprocess.call(png_args, shell=True)
            _ = subprocess.call(png_args)

            # display in-notebook
            display(Image((png_file_name)))
sys.path.insert(0, "../..")
import h2o
from h2o.backend import H2OLocalServer
from h2o.exceptions import H2OConnectionError

PORT = 55330

# Check whether there is already an instance running at the specified port, and if so shut it down.
try:
    conn = h2o.connect(ip="localhost", port=PORT)
    conn.shutdown_server(prompt=False)
except H2OConnectionError:
    pass

# Now start a new H2O server and connect to it.
server = H2OLocalServer.start(port=str(PORT) + "+")
conn = h2o.connect(server=server)

# Get if cluster is up (True) or not (False)
cluster_up = conn.cluster_is_up()

# Check if cluster is healthy
cluster_healthy = all(node["healthy"] for node in conn.info().nodes)

# Logical test to see if status is healthy or not
if cluster_healthy and cluster_up:
    print("Cluster health is up and healthy")
elif not cluster_healthy and cluster_up:
    raise ValueError("Cluster is up but not healthy")
else:
    raise ValueError("Cluster is not up and is not healthy")
Esempio n. 7
0
    conn.cluster.shutdown(prompt=False)
except H2OConnectionError:
    pass

# The server takes some time to shut down, so try different ports
print("Start a server with max_mem_size = 1Gb")
h2o.init(max_mem_size="1g", port=10101, strict_version_check=False)
h2o.cluster().shutdown()

print("Starting a server with min_mem_size = 314Mb")
h2o.init(min_mem_size="314M", port=20202, strict_version_check=False)
h2o.cluster().shutdown()

print("Starting a server explicitly")
# Now start a new H2O server and connect to it.
server = H2OLocalServer.start(port=str(PORT) + "+")
conn = h2o.connect(server=server)

# Get if cluster is up (True) or not (False)
cluster_up = conn.cluster.is_running()

# Check if cluster is healthy
cluster_healthy = all(node["healthy"] for node in conn.cluster.nodes)

# Logical test to see if status is healthy or not
if cluster_healthy and cluster_up:
    print("Cluster is up and healthy")
elif not cluster_healthy and cluster_up:
    raise ValueError("Cluster is up but not healthy")
else:
    raise ValueError("Cluster is not up and is not healthy")