def import_frame(path=None, vecs=None): """ Import a frame. :param path: :return: """ return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
def import_frame(path=None, vecs=None): """ Import a frame from a file (remote or local machine). If you run H2O on Hadoop, you can access to HDFS :param path: A path specifying the location of the data to import. :return: A new H2OFrame """ return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
def list_timezones(): """ Get a list of all the timezones :return: the time zones (as an H2OFrame) """ return H2OFrame(expr=ExprNode("listTimeZones"))._frame()
def get_timezone(): """ Get the Time Zone on the H2O Cloud :return: the time zone (string) """ return H2OFrame(expr=ExprNode("getTimeZone"))._scalar()
def ls(): """ List Keys on an H2O Cluster :return: Returns a list of keys in the current H2O instance """ return H2OFrame(expr=ExprNode("ls"))._frame().as_data_frame()
def get_frame(frame_id): if frame_id is None: raise ValueError("frame_id must not be None") res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id)) res = res["frames"][0] colnames = [v["label"] for v in res["columns"]] veckeys = res["vec_ids"] vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"]) return H2OFrame(vecs=vecs)
def ifelse(test,yes,no): """ Semantically equivalent to R's ifelse. Based on the booleans in the test vector, the output has the values of the yes and no vectors interleaved (or merged together). :param test: A "test" H2OFrame :param yes: A "yes" H2OFrame :param no: A "no" H2OFrame :return: An H2OFrame """ return H2OFrame(expr=ExprNode("ifelse",test,yes,no))._frame()
def ifelse(test, yes, no): """ Semantically equivalent to R's ifelse. Based on the booleans in the test vector, the output has the values of the yes and no vectors interleaved (or merged together). :param test: A "test" H2OFrame :param yes: A "yes" H2OFrame :param no: A "no" H2OFrame :return: An H2OFrame """ test_a = None yes_a = None no_a = None test_tmp = None yes_tmp = None no_tmp = None if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE" else: if isinstance(test, H2OVec): test_tmp = test._expr.eager() else: test_tmp = test.key() test_a = "'" + test_tmp + "'" if isinstance(yes, (int, float)): yes_a = "#{}".format(str(yes)) elif yes is None: yes_a = "#NaN" else: if isinstance(yes, H2OVec): yes_tmp = yes._expr.eager() else: yes_tmp = yes.key() yes_a = "'" + yes_tmp + "'" if isinstance(no, (int, float)): no_a = "#{}".format(str(no)) elif no is None: no_a = "#NaN" else: if isinstance(no, H2OVec): no_tmp = no._expr.eager() else: no_tmp = no.key() no_a = "'" + no_tmp + "'" tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ifelse {} {} {}))".format(tmp_key, test_a, yes_a, no_a) rapids(expr) j = frame(tmp_key) # Fetch the frame as JSON fr = j['frames'][0] # Just the first (only) frame rows = fr['rows'] # Row count veckeys = fr['vec_ids'] # List of h2o vec keys cols = fr['columns'] # List of columns colnames = [col['label'] for col in cols] vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows) # Peel the Vecs out of the returned Frame removeFrameShallow(tmp_key) if yes_tmp is not None: removeFrameShallow(str(yes_tmp)) if no_tmp is not None: removeFrameShallow(str(no_tmp)) if test_tmp is not None: removeFrameShallow(str(test_tmp)) return H2OFrame(vecs=vecs)
def upload_file(path, destination_key=""): """ Upload a dataset at the path given from the local machine to the H2O cluster. :param path: A path specifying the location of the data to upload. :param destination_key: The name of the H2O Frame in the H2O Cluster. :return: A new H2OFrame """ fui = {"file": os.path.abspath(path)} dest_key = H2OFrame.py_tmp_key() if destination_key == "" else destination_key H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_key=dest_key) return H2OFrame(text_key=dest_key)
def _check_frame(x,y,response): if not isinstance(x,H2OFrame): if not isinstance(x,list): raise ValueError("`x` must be an H2OFrame or a list of H2OVecs. Got: " + str(type(x))) x = H2OFrame(vecs=x) if y: if not isinstance(y,H2OVec): raise ValueError("`y` must be an H2OVec. Got: " + str(type(y))) for v in x._vecs: if y._name == v._name: raise ValueError("Found response "+y._name+" in training `x` data") x[response._name] = y return x
def _simple_un_math_op(op, data): """ Element-wise math operations on H2OFrame and H2OVec :param op: the math operation :param data: the H2OFrame or H2OVec object to operate on. :return: H2OFrame or H2oVec, with lazy operation """ if isinstance(data, H2OFrame): return H2OFrame( vecs=[_simple_un_math_op(op, vec) for vec in data._vecs]) if isinstance(data, H2OVec): return H2OVec(data._name, Expr(op, left=data, length=len(data))) raise ValueError, op + " only operates on H2OFrame or H2OVec objects"
def as_list(data, use_pandas=True): """ Convert an H2O data object into a python-specific object. WARNING: This will pull all data local! If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame. Otherwise, a list-of-lists populated by character data will be returned (so the types of data will all be str). :param data: An H2O data object. :param use_pandas: Try to use pandas for reading in the data. :return: List of list (Rows x Columns). """ # check to see if we can use pandas found_pandas = False try: imp.find_module('pandas') # if have pandas, use this to eat a frame found_pandas = True except ImportError: found_pandas = False # if frame, download the frame and jam into lol or pandas df if isinstance(data, H2OFrame): fr = H2OFrame.send_frame(data) res = _as_data_frame(fr, use_pandas and found_pandas) removeFrameShallow(fr) return res if isinstance(data, Expr): if data.is_local(): return data._data if data.is_pending(): data.eager() if data.is_local(): return [data._data] if isinstance(data._data, list) else [[data._data]] return _as_data_frame(data._data, use_pandas and found_pandas) if isinstance(data, H2OVec): if data._expr.is_local(): return data._expr._data if data._expr.is_pending(): data._expr.eager() if data._expr.is_local(): return [[data._expr._data]] return as_list(H2OFrame(vecs=[data]), use_pandas)
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)): """ Used in conjunction with import_file and parse_setup in order to make alterations before parsing. :param setup: Result of h2o.parse_setup :param id: An optional id for the frame. :param first_line_is_header: -1,0,1 if the first line is to be used as the header :return: An H2OFrame object """ id = setup["destination_frame"] fr = H2OFrame() parsed = parse(setup, id, first_line_is_header) fr._nrows = parsed['rows'] fr._col_names = parsed['column_names'] fr._ncols = len(fr._col_names) fr._computed = True fr._id = id return fr
def parse_raw(setup, id=None, first_line_is_header=(-1, 0, 1)): """ Used in conjunction with import_file and parse_setup in order to make alterations before parsing. :param setup: Result of h2o.parse_setup :param id: An optional id for the frame. :param first_line_is_header: -1,0,1 if the first line is to be used as the header :return: An H2OFrame object """ if id is None: id = H2OFrame.py_tmp_key() parsed = parse(setup, id, first_line_is_header) veckeys = parsed['vec_ids'] rows = parsed['rows'] cols = parsed['column_names'] if parsed["column_names"] else [ "C" + str(x) for x in range(1, len(veckeys) + 1) ] vecs = H2OVec.new_vecs(zip(cols, veckeys), rows) return H2OFrame(vecs=vecs)
def ls(): """ List Keys on an H2O Cluster :return: Returns a list of keys in the current H2O instance """ tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ls ))".format(tmp_key) rapids(expr) j = frame(tmp_key) fr = j['frames'][0] rows = fr['rows'] veckeys = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows) fr = H2OFrame(vecs=vecs) fr.setNames(["keys"]) print "First 10 Keys: " fr.show() return as_list(fr, use_pandas=False)
def cbind(left, right): """ :param left: H2OFrame or H2OVec :param right: H2OFrame or H2OVec :return: new H2OFrame with left|right cbinded """ # Check left and right data types vecs = [] if isinstance(left, H2OFrame) and isinstance(right, H2OFrame): vecs = left._vecs + right._vecs elif isinstance(left, H2OFrame) and isinstance(right, H2OVec): [vecs.append(vec) for vec in left._vecs] vecs.append(right) elif isinstance(left, H2OVec) and isinstance(right, H2OVec): vecs = [left, right] elif isinstance(left, H2OVec) and isinstance(right, H2OFrame): vecs.append(left) [vecs.append(vec) for vec in right._vecs] else: raise ValueError("left and right data must be H2OVec or H2OFrame") names = [vec.name() for vec in vecs] fr = H2OFrame.py_tmp_key() cbind = "(= !" + fr + " (cbind %FALSE %" cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))" rapids(cbind) j = frame(fr) fr = j['frames'][0] rows = fr['rows'] vec_ids = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows)) result.setNames(names) return result
def which(condition): """ :param condition: A conditional statement. :return: A H2OFrame of 1 column filled with 0-based indices for which the condition is True """ return H2OFrame(expr=ExprNode("h2o.which",condition,False))._frame()