def h2o_plot(model, model_file_prefix): ''' Plot an h2o tree. From: https://resources.oreilly.com/oriole/interpretable-machine-learning-with-python-xgboost-and-h2o/blob/master/dt_surrogate_loco.ipynb :param model: :param model_file_prefix: :return: ''' mojo_path = model.download_mojo() hs = H2OLocalServer() h2o_jar_path = hs._find_jar() gv_file_name = model_file_prefix + '.gv' gv_args = str('java -cp ' + h2o_jar_path + ' hex.genmodel.tools.PrintMojo --tree 0 --decimalplaces 2 -i ' + mojo_path + ' -o').split() gv_args.append(gv_file_name) subprocess.call(gv_args) # Compute complexity of dt based on the resulting graphviz complexity = open(gv_file_name, 'r').read().count("shape=box") png_file_name = model_file_prefix + '.png' png_args = str('dot -Tpng ' + gv_file_name + ' -o ').split() png_args.append(png_file_name) subprocess.call(png_args) return complexity
def init(url=None, ip=None, port=None, https=None, insecure=False, username=None, password=None, cluster_name=None, proxy=None, start_h2o=True, nthreads=-1, ice_root=None, enable_assertions=True, max_mem_size=None, min_mem_size=None, strict_version_check=True, **kwargs): """ Attempt to connect to a local server, or if not successful start a new server and connect to it. :param url: :param ip: :param port: :param https: :param insecure: :param username: :param password: :param cluster_name: :param proxy: :param start_h2o: :param nthreads: :param ice_root: :param enable_assertions: :param max_mem_size: :param min_mem_size: :param strict_version_check: :param kwargs: (all other deprecated attributes) :returns: nothing """ scheme = "https" if https else "http" proxy = proxy[scheme] if proxy is not None and scheme in proxy else \ kwargs["proxies"][scheme] if "proxies" in kwargs and scheme in kwargs["proxies"] else None mmax = int(max_mem_size) if max_mem_size is not None else \ kwargs["max_mem_size_GB"] << 30 if "max_mem_size_GB" in kwargs else None mmin = int(min_mem_size) if min_mem_size is not None else \ kwargs["min_mem_size_GB"] << 30 if "min_mem_size_GB" in kwargs else None auth = (username, password) if username and password else None if not start_h2o: print("Warning: if you don't want to start local H2O server, then use of `h2o.connect()` is preferred.") if ip and ip != "localhost" and ip != "127.0.0.1" and start_h2o: print("Warning: connecting to remote server but falling back to local... Did you mean to use `h2o.connect()`?") try: connect(url=url, ip=ip, port=port, https=https, verify_ssl_certificates=not insecure, auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True) except H2OConnectionError: # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server... if port and not str(port).endswith("+"): port = str(port) + "+" if not start_h2o: raise global h2oconn hs = H2OLocalServer.start(nthreads=nthreads, enable_assertions=enable_assertions, max_mem_size=mmax, min_mem_size=mmin, ice_root=ice_root, port=port) h2oconn = H2OConnection.open(server=hs, https=https, verify_ssl_certificates=not insecure, auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True) if strict_version_check: version_check() if h2oconn.info().build_too_old: print("Warning: Your H2O cluster version is too old ({})! Please download and install the latest version from http://h2o.ai/download/".format(h2oconn.info().build_age))
def get_gv(title, model_id, mojo_path): """ Utility function to generate graphviz dot file from h2o MOJO using a subprocess. Args: title: Title for displayed decision tree. model_id: h2o model identifier. mojo_path: Path to saved model MOJO (Java scoring artifact); generated by train_cv_dt function above. """ # locate h2o jar hs = H2OLocalServer() h2o_jar_path = hs._find_jar() print('Discovered H2O jar path:\n', h2o_jar_path) # construct command line call to generate graphviz version of # tree, see for more information: # http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html gv_file_name = model_id + '.gv' gv_args = str('-cp ' + h2o_jar_path + ' hex.genmodel.tools.PrintMojo --tree 0 -i ' + mojo_path + ' -o').split() gv_args.insert(0, 'java') gv_args.append(gv_file_name) if title is not None: gv_args = gv_args + ['--title', title] # call constructed command print() print('Calling external process ...') print(' '.join(gv_args)) # if the line below is failing for you, try instead: # _ = subprocess.call(gv_args, shell=True) _ = subprocess.call(gv_args)
def init(url=None, ip=None, port=None, https=None, insecure=False, username=None, password=None, cluster_name=None, proxy=None, start_h2o=True, nthreads=-1, ice_root=None, enable_assertions=True, max_mem_size=None, min_mem_size=None, strict_version_check=True, **kwargs): """ Attempt to connect to a local server, or if not successful start a new server and connect to it. :param url: :param ip: :param port: :param https: :param insecure: :param username: :param password: :param cluster_name: :param proxy: :param start_h2o: :param nthreads: :param ice_root: :param enable_assertions: :param max_mem_size: :param min_mem_size: :param strict_version_check: :param kwargs: (all other deprecated attributes) :returns: nothing """ global h2oconn scheme = "https" if https else "http" proxy = proxy[scheme] if proxy is not None and scheme in proxy else \ kwargs["proxies"][scheme] if "proxies" in kwargs and scheme in kwargs["proxies"] else None mmax = int(max_mem_size) if max_mem_size is not None else \ kwargs["max_mem_size_GB"] << 30 if "max_mem_size_GB" in kwargs else None mmin = int(min_mem_size) if min_mem_size is not None else \ kwargs["min_mem_size_GB"] << 30 if "min_mem_size_GB" in kwargs else None auth = (username, password) if username and password else None if not start_h2o: print( "Warning: if you don't want to start local H2O server, then use of `h2o.connect()` is preferred." ) if ip and ip != "localhost" and ip != "127.0.0.1" and start_h2o: print( "Warning: connecting to remote server but falling back to local... Did you mean to use `h2o.connect()`?" ) try: h2oconn = H2OConnection.open( url=url, ip=ip, port=port, https=https, verify_ssl_certificates=not insecure, auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True, _msgs=( "Checking whether there is an H2O instance running at {url}", "connected.", "not found.")) except H2OConnectionError: # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server... if port and not str(port).endswith("+"): port = str(port) + "+" if not start_h2o: raise hs = H2OLocalServer.start(nthreads=nthreads, enable_assertions=enable_assertions, max_mem_size=mmax, min_mem_size=mmin, ice_root=ice_root, port=port) h2oconn = H2OConnection.open(server=hs, https=https, verify_ssl_certificates=not insecure, auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True) if strict_version_check: version_check()
def display_tree(self, title, shell=False): if not shell: predict_probs = pd.DataFrame(self.model.predict_proba( self.X_test)[:, 1], columns=['prob_pred']) test = pd.DataFrame(np.c_[self.X_test, self.y_test], columns=self.feature_names.append( pd.Index(['label']))) test_yhat = h2o.H2OFrame(pd.concat([test, predict_probs], axis=1)) # initialize single tree surrogate model surrogate = H2ORandomForestEstimator( ntrees=1, # use only one tree sample_rate=1, # use all rows in that tree mtries=-2, # use all columns in that tree max_depth=self. max_depth, # shallow trees are easier to understand seed=12345, # random seed for reproducibility model_id=self.model_id ) # gives MOJO artifact a recognizable name # train single tree surrogate model surrogate.train(x=list(self.feature_names), y='label', training_frame=test_yhat) # persist MOJO (compiled, representation of trained model) # from which to generate plot of surrogate mojo_path = surrogate.download_mojo(path='.') # locate h2o jar hs = H2OLocalServer() h2o_jar_path = hs._find_jar() # construct command line call to generate graphviz version of # surrogate tree see for more information: # http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html model_id = self.model_id gv_file_name = model_id + '.gv' gv_args = str('-cp ' + h2o_jar_path + ' hex.genmodel.tools.PrintMojo --tree 0 -i ' + mojo_path + ' -o').split() gv_args.insert(0, 'java') gv_args.append(gv_file_name) if title is not None: gv_args = gv_args + ['--title', title] # if the line below is failing for you, try instead: # _ = subprocess.call(gv_args, shell=True) _ = subprocess.call(gv_args) # construct call to generate PNG from # graphviz representation of the tree png_file_name = model_id + '.png' png_args = str('dot -Tpng ' + gv_file_name + ' -o ' + png_file_name) png_args = png_args.split() # if the line below is failing for you, try instead: # _ = subprocess.call(png_args, shell=True) _ = subprocess.call(png_args) # display in-notebook display(Image((png_file_name)))
sys.path.insert(0, "../..") import h2o from h2o.backend import H2OLocalServer from h2o.exceptions import H2OConnectionError PORT = 55330 # Check whether there is already an instance running at the specified port, and if so shut it down. try: conn = h2o.connect(ip="localhost", port=PORT) conn.shutdown_server(prompt=False) except H2OConnectionError: pass # Now start a new H2O server and connect to it. server = H2OLocalServer.start(port=str(PORT) + "+") conn = h2o.connect(server=server) # Get if cluster is up (True) or not (False) cluster_up = conn.cluster_is_up() # Check if cluster is healthy cluster_healthy = all(node["healthy"] for node in conn.info().nodes) # Logical test to see if status is healthy or not if cluster_healthy and cluster_up: print("Cluster health is up and healthy") elif not cluster_healthy and cluster_up: raise ValueError("Cluster is up but not healthy") else: raise ValueError("Cluster is not up and is not healthy")
conn.cluster.shutdown(prompt=False) except H2OConnectionError: pass # The server takes some time to shut down, so try different ports print("Start a server with max_mem_size = 1Gb") h2o.init(max_mem_size="1g", port=10101, strict_version_check=False) h2o.cluster().shutdown() print("Starting a server with min_mem_size = 314Mb") h2o.init(min_mem_size="314M", port=20202, strict_version_check=False) h2o.cluster().shutdown() print("Starting a server explicitly") # Now start a new H2O server and connect to it. server = H2OLocalServer.start(port=str(PORT) + "+") conn = h2o.connect(server=server) # Get if cluster is up (True) or not (False) cluster_up = conn.cluster.is_running() # Check if cluster is healthy cluster_healthy = all(node["healthy"] for node in conn.cluster.nodes) # Logical test to see if status is healthy or not if cluster_healthy and cluster_up: print("Cluster is up and healthy") elif not cluster_healthy and cluster_up: raise ValueError("Cluster is up but not healthy") else: raise ValueError("Cluster is not up and is not healthy")