def get_frame(frame_id): if frame_id is None: raise ValueError("frame_id must not be None") res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id)) res = res["frames"][0] colnames = [v["label"] for v in res["columns"]] veckeys = res["vec_ids"] vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"]) return H2OFrame(vecs=vecs)
def get_frame(frame_id): if frame_id is None: raise ValueError("frame_id must not be None") res = H2OConnection.get_json("Frames/"+urllib.quote(frame_id)) res = res["frames"][0] colnames = [v["label"] for v in res["columns"]] veckeys = res["vec_ids"] vecs=H2OVec.new_vecs(zip(colnames, veckeys), res["rows"]) return H2OFrame(vecs=vecs)
def ifelse(test, yes, no): """ Semantically equivalent to R's ifelse. Based on the booleans in the test vector, the output has the values of the yes and no vectors interleaved (or merged together). :param test: A "test" H2OFrame :param yes: A "yes" H2OFrame :param no: A "no" H2OFrame :return: An H2OFrame """ test_a = None yes_a = None no_a = None test_tmp = None yes_tmp = None no_tmp = None if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE" else: if isinstance(test, H2OVec): test_tmp = test._expr.eager() else: test_tmp = test.key() test_a = "'" + test_tmp + "'" if isinstance(yes, (int, float)): yes_a = "#{}".format(str(yes)) elif yes is None: yes_a = "#NaN" else: if isinstance(yes, H2OVec): yes_tmp = yes._expr.eager() else: yes_tmp = yes.key() yes_a = "'" + yes_tmp + "'" if isinstance(no, (int, float)): no_a = "#{}".format(str(no)) elif no is None: no_a = "#NaN" else: if isinstance(no, H2OVec): no_tmp = no._expr.eager() else: no_tmp = no.key() no_a = "'" + no_tmp + "'" tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ifelse {} {} {}))".format(tmp_key, test_a, yes_a, no_a) rapids(expr) j = frame(tmp_key) # Fetch the frame as JSON fr = j['frames'][0] # Just the first (only) frame rows = fr['rows'] # Row count veckeys = fr['vec_ids'] # List of h2o vec keys cols = fr['columns'] # List of columns colnames = [col['label'] for col in cols] vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows) # Peel the Vecs out of the returned Frame removeFrameShallow(tmp_key) if yes_tmp is not None: removeFrameShallow(str(yes_tmp)) if no_tmp is not None: removeFrameShallow(str(no_tmp)) if test_tmp is not None: removeFrameShallow(str(test_tmp)) return H2OFrame(vecs=vecs)
def ifelse(test,yes,no): """ Semantically equivalent to R's ifelse. Based on the booleans in the test vector, the output has the values of the yes and no vectors interleaved (or merged together). :param test: A "test" H2OFrame :param yes: A "yes" H2OFrame :param no: A "no" H2OFrame :return: An H2OFrame """ test_a=None yes_a =None no_a =None test_tmp = None yes_tmp = None no_tmp = None if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE" else: if isinstance(test,H2OVec): test_tmp = test._expr.eager() else: test_tmp = test.key() test_a = "'"+test_tmp+"'" if isinstance(yes, (int,float)): yes_a = "#{}".format(str(yes)) elif yes is None: yes_a = "#NaN" else: if isinstance(yes,H2OVec): yes_tmp = yes._expr.eager() else: yes_tmp = yes.key() yes_a = "'"+yes_tmp+"'" if isinstance(no, (int,float)): no_a = "#{}".format(str(no)) elif no is None: no_a = "#NaN" else: if isinstance(no,H2OVec): no_tmp = no._expr.eager() else: no_tmp = no.key() no_a = "'"+no_tmp+"'" tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ifelse {} {} {}))".format(tmp_key,test_a,yes_a,no_a) rapids(expr) j = frame(tmp_key) # Fetch the frame as JSON fr = j['frames'][0] # Just the first (only) frame rows = fr['rows'] # Row count veckeys = fr['vec_ids']# List of h2o vec keys cols = fr['columns'] # List of columns colnames = [col['label'] for col in cols] vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows) # Peel the Vecs out of the returned Frame removeFrameShallow(tmp_key) if yes_tmp is not None: removeFrameShallow(str(yes_tmp)) if no_tmp is not None: removeFrameShallow(str(no_tmp)) if test_tmp is not None: removeFrameShallow(str(test_tmp)) return H2OFrame(vecs=vecs)
def _simple_un_math_op(op, data): """ Element-wise math operations on H2OFrame and H2OVec :param op: the math operation :param data: the H2OFrame or H2OVec object to operate on. :return: H2OFrame or H2oVec, with lazy operation """ if isinstance(data, H2OFrame): return H2OFrame( vecs=[_simple_un_math_op(op, vec) for vec in data._vecs]) if isinstance(data, H2OVec): return H2OVec(data._name, Expr(op, left=data, length=len(data))) raise ValueError, op + " only operates on H2OFrame or H2OVec objects"
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)): """ Used in conjunction with import_file and parse_setup in order to make alterations before parsing. :param setup: Result of h2o.parse_setup :param id: An optional id for the frame. :param first_line_is_header: -1,0,1 if the first line is to be used as the header :return: An H2OFrame object """ if id is None: id = H2OFrame.py_tmp_key() parsed = parse(setup, id, first_line_is_header) veckeys = parsed['vec_ids'] rows = parsed['rows'] cols = parsed['column_names'] if parsed["column_names"] else ["C" + str(x) for x in range(1,len(veckeys)+1)] vecs = H2OVec.new_vecs(zip(cols, veckeys), rows) return H2OFrame(vecs=vecs)
def rep_len(data, length_out): if isinstance(data, (str, int)): tmp_key = H2OFrame.py_tmp_key() scaler = '#{}'.format(data) if isinstance(data, int) else '\"{}\"'.format(data) expr = "(= !{} (rep_len {} {}))".format(tmp_key,scaler,'#{}'.format(length_out)) rapids(expr) j = frame(tmp_key) fr = j['frames'][0] rows = fr['rows'] veckeys = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows) removeFrameShallow(tmp_key) return H2OFrame(vecs=vecs) return data.rep_len(length_out=length_out)
def parse_raw(setup, id=None, first_line_is_header=(-1, 0, 1)): """ Used in conjunction with import_file and parse_setup in order to make alterations before parsing. :param setup: Result of h2o.parse_setup :param id: An optional id for the frame. :param first_line_is_header: -1,0,1 if the first line is to be used as the header :return: An H2OFrame object """ if id is None: id = H2OFrame.py_tmp_key() parsed = parse(setup, id, first_line_is_header) veckeys = parsed['vec_ids'] rows = parsed['rows'] cols = parsed['column_names'] if parsed["column_names"] else [ "C" + str(x) for x in range(1, len(veckeys) + 1) ] vecs = H2OVec.new_vecs(zip(cols, veckeys), rows) return H2OFrame(vecs=vecs)
def ls(): """ List Keys on an H2O Cluster :return: Returns a list of keys in the current H2O instance """ tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ls ))".format(tmp_key) rapids(expr) j = frame(tmp_key) fr = j['frames'][0] rows = fr['rows'] veckeys = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows) fr = H2OFrame(vecs=vecs) print "First 10 Keys: " fr.show() return as_list(fr, use_pandas=False)
def ls(): """ List Keys on an H2O Cluster :return: Returns a list of keys in the current H2O instance """ tmp_key = H2OFrame.py_tmp_key() expr = "(= !{} (ls ))".format(tmp_key) rapids(expr) j = frame(tmp_key) fr = j['frames'][0] rows = fr['rows'] veckeys = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows) fr = H2OFrame(vecs=vecs) fr.setNames(["keys"]) print "First 10 Keys: " fr.show() return as_list(fr, use_pandas=False)
def cbind(left, right): """ :param left: H2OFrame or H2OVec :param right: H2OFrame or H2OVec :return: new H2OFrame with left|right cbinded """ # Check left and right data types vecs = [] if isinstance(left, H2OFrame) and isinstance(right, H2OFrame): vecs = left._vecs + right._vecs elif isinstance(left, H2OFrame) and isinstance(right, H2OVec): [vecs.append(vec) for vec in left._vecs] vecs.append(right) elif isinstance(left, H2OVec) and isinstance(right, H2OVec): vecs = [left, right] elif isinstance(left, H2OVec) and isinstance(right, H2OFrame): vecs.append(left) [vecs.append(vec) for vec in right._vecs] else: raise ValueError("left and right data must be H2OVec or H2OFrame") names = [vec.name() for vec in vecs] fr = H2OFrame.py_tmp_key() cbind = "(= !" + fr + " (cbind %FALSE %" cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))" rapids(cbind) j = frame(fr) fr = j['frames'][0] rows = fr['rows'] vec_ids = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows)) result.setNames(names) return result
def cbind(left,right): """ :param left: H2OFrame or H2OVec :param right: H2OFrame or H2OVec :return: new H2OFrame with left|right cbinded """ # Check left and right data types vecs = [] if isinstance(left,H2OFrame) and isinstance(right,H2OFrame): vecs = left._vecs + right._vecs elif isinstance(left,H2OFrame) and isinstance(right,H2OVec): [vecs.append(vec) for vec in left._vecs] vecs.append(right) elif isinstance(left,H2OVec) and isinstance(right,H2OVec): vecs = [left, right] elif isinstance(left,H2OVec) and isinstance(right,H2OFrame): vecs.append(left) [vecs.append(vec) for vec in right._vecs] else: raise ValueError("left and right data must be H2OVec or H2OFrame") names = [vec.name() for vec in vecs] fr = H2OFrame.py_tmp_key() cbind = "(= !" + fr + " (cbind %FALSE %" cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))" rapids(cbind) j = frame(fr) fr = j['frames'][0] rows = fr['rows'] vec_ids = fr['vec_ids'] cols = fr['columns'] colnames = [col['label'] for col in cols] result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows)) result.setNames(names) return result