Example #1
0
def import_frame(path=None, vecs=None):
  """
  Import a frame.
  :param path:
  :return:
  """
  return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
Example #2
0
def import_frame(path=None, vecs=None):
    """
  Import a frame from a file (remote or local machine). If you run H2O on Hadoop, you can access to HDFS

  :param path: A path specifying the location of the data to import.
  :return: A new H2OFrame
  """
    return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
Example #3
0
File: h2o.py Project: moidin/h2o-3
def list_timezones():
  """
  Get a list of all the timezones

  :return: the time zones (as an H2OFrame)
  """
  return H2OFrame(expr=ExprNode("listTimeZones"))._frame()
Example #4
0
File: h2o.py Project: moidin/h2o-3
def get_timezone():
  """
  Get the Time Zone on the H2O Cloud

  :return: the time zone (string)
  """
  return H2OFrame(expr=ExprNode("getTimeZone"))._scalar()
Example #5
0
File: h2o.py Project: moidin/h2o-3
def ls():
  """
  List Keys on an H2O Cluster

  :return: Returns a list of keys in the current H2O instance
  """
  return H2OFrame(expr=ExprNode("ls"))._frame().as_data_frame()
Example #6
0
def get_frame(frame_id):
    if frame_id is None:
        raise ValueError("frame_id must not be None")
    res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
    res = res["frames"][0]
    colnames = [v["label"] for v in res["columns"]]
    veckeys = res["vec_ids"]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
    return H2OFrame(vecs=vecs)
Example #7
0
File: h2o.py Project: moidin/h2o-3
def ifelse(test,yes,no):
  """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
  return H2OFrame(expr=ExprNode("ifelse",test,yes,no))._frame()
Example #8
0
def ifelse(test, yes, no):
    """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
    test_a = None
    yes_a = None
    no_a = None

    test_tmp = None
    yes_tmp = None
    no_tmp = None

    if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE"
    else:
        if isinstance(test, H2OVec): test_tmp = test._expr.eager()
        else: test_tmp = test.key()
        test_a = "'" + test_tmp + "'"
    if isinstance(yes, (int, float)): yes_a = "#{}".format(str(yes))
    elif yes is None: yes_a = "#NaN"
    else:
        if isinstance(yes, H2OVec): yes_tmp = yes._expr.eager()
        else: yes_tmp = yes.key()
        yes_a = "'" + yes_tmp + "'"
    if isinstance(no, (int, float)): no_a = "#{}".format(str(no))
    elif no is None: no_a = "#NaN"
    else:
        if isinstance(no, H2OVec): no_tmp = no._expr.eager()
        else: no_tmp = no.key()
        no_a = "'" + no_tmp + "'"

    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ifelse {} {} {}))".format(tmp_key, test_a, yes_a, no_a)
    rapids(expr)
    j = frame(tmp_key)  # Fetch the frame as JSON
    fr = j['frames'][0]  # Just the first (only) frame
    rows = fr['rows']  # Row count
    veckeys = fr['vec_ids']  # List of h2o vec keys
    cols = fr['columns']  # List of columns
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys),
                           rows)  # Peel the Vecs out of the returned Frame
    removeFrameShallow(tmp_key)
    if yes_tmp is not None: removeFrameShallow(str(yes_tmp))
    if no_tmp is not None: removeFrameShallow(str(no_tmp))
    if test_tmp is not None: removeFrameShallow(str(test_tmp))
    return H2OFrame(vecs=vecs)
Example #9
0
def upload_file(path, destination_key=""):
  """
  Upload a dataset at the path given from the local machine to the H2O cluster.

  :param path: A path specifying the location of the data to upload.
  :param destination_key: The name of the H2O Frame in the H2O Cluster.
  :return: A new H2OFrame
  """
  fui = {"file": os.path.abspath(path)}
  dest_key = H2OFrame.py_tmp_key() if destination_key == "" else destination_key
  H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_key=dest_key)
  return H2OFrame(text_key=dest_key)
def _check_frame(x,y,response):
  if not isinstance(x,H2OFrame):
    if not isinstance(x,list):
      raise ValueError("`x` must be an H2OFrame or a list of H2OVecs. Got: " + str(type(x)))
    x = H2OFrame(vecs=x)
  if y:
    if not isinstance(y,H2OVec):
      raise ValueError("`y` must be an H2OVec. Got: " + str(type(y)))
    for v in x._vecs:
      if y._name == v._name:
        raise ValueError("Found response "+y._name+" in training `x` data")
    x[response._name] = y
  return x
Example #11
0
def _simple_un_math_op(op, data):
    """
  Element-wise math operations on H2OFrame and H2OVec

  :param op: the math operation
  :param data: the H2OFrame or H2OVec object to operate on.
  :return: H2OFrame or H2oVec, with lazy operation
  """
    if isinstance(data, H2OFrame):
        return H2OFrame(
            vecs=[_simple_un_math_op(op, vec) for vec in data._vecs])
    if isinstance(data, H2OVec):
        return H2OVec(data._name, Expr(op, left=data, length=len(data)))
    raise ValueError, op + " only operates on H2OFrame or H2OVec objects"
Example #12
0
def as_list(data, use_pandas=True):
    """
  Convert an H2O data object into a python-specific object.

  WARNING: This will pull all data local!

  If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame.
  Otherwise, a list-of-lists populated by character data will be returned (so the types of data will
  all be str).

  :param data: An H2O data object.
  :param use_pandas: Try to use pandas for reading in the data.
  :return: List of list (Rows x Columns).
  """

    # check to see if we can use pandas
    found_pandas = False
    try:
        imp.find_module('pandas')  # if have pandas, use this to eat a frame
        found_pandas = True
    except ImportError:
        found_pandas = False

    # if frame, download the frame and jam into lol or pandas df
    if isinstance(data, H2OFrame):
        fr = H2OFrame.send_frame(data)
        res = _as_data_frame(fr, use_pandas and found_pandas)
        removeFrameShallow(fr)
        return res

    if isinstance(data, Expr):
        if data.is_local(): return data._data
        if data.is_pending():
            data.eager()
            if data.is_local():
                return [data._data] if isinstance(data._data,
                                                  list) else [[data._data]]
        return _as_data_frame(data._data, use_pandas and found_pandas)

    if isinstance(data, H2OVec):
        if data._expr.is_local(): return data._expr._data
        if data._expr.is_pending():
            data._expr.eager()
            if data._expr.is_local(): return [[data._expr._data]]

        return as_list(H2OFrame(vecs=[data]), use_pandas)
Example #13
0
File: h2o.py Project: moidin/h2o-3
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)):
  """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.

  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
  id = setup["destination_frame"]
  fr = H2OFrame()
  parsed = parse(setup, id, first_line_is_header)
  fr._nrows = parsed['rows']
  fr._col_names = parsed['column_names']
  fr._ncols = len(fr._col_names)
  fr._computed = True
  fr._id = id
  return fr
Example #14
0
def parse_raw(setup, id=None, first_line_is_header=(-1, 0, 1)):
    """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.
  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
    if id is None: id = H2OFrame.py_tmp_key()
    parsed = parse(setup, id, first_line_is_header)
    veckeys = parsed['vec_ids']
    rows = parsed['rows']
    cols = parsed['column_names'] if parsed["column_names"] else [
        "C" + str(x) for x in range(1,
                                    len(veckeys) + 1)
    ]
    vecs = H2OVec.new_vecs(zip(cols, veckeys), rows)
    return H2OFrame(vecs=vecs)
Example #15
0
def ls():
    """
  List Keys on an H2O Cluster
  :return: Returns a list of keys in the current H2O instance
  """
    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ls ))".format(tmp_key)
    rapids(expr)
    j = frame(tmp_key)
    fr = j['frames'][0]
    rows = fr['rows']
    veckeys = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows)
    fr = H2OFrame(vecs=vecs)
    fr.setNames(["keys"])
    print "First 10 Keys: "
    fr.show()
    return as_list(fr, use_pandas=False)
Example #16
0
def cbind(left, right):
    """
  :param left: H2OFrame or H2OVec
  :param right: H2OFrame or H2OVec
  :return: new H2OFrame with left|right cbinded
  """
    # Check left and right data types
    vecs = []
    if isinstance(left, H2OFrame) and isinstance(right, H2OFrame):
        vecs = left._vecs + right._vecs
    elif isinstance(left, H2OFrame) and isinstance(right, H2OVec):
        [vecs.append(vec) for vec in left._vecs]
        vecs.append(right)
    elif isinstance(left, H2OVec) and isinstance(right, H2OVec):
        vecs = [left, right]
    elif isinstance(left, H2OVec) and isinstance(right, H2OFrame):
        vecs.append(left)
        [vecs.append(vec) for vec in right._vecs]
    else:
        raise ValueError("left and right data must be H2OVec or H2OFrame")
    names = [vec.name() for vec in vecs]

    fr = H2OFrame.py_tmp_key()
    cbind = "(= !" + fr + " (cbind %FALSE %"
    cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))"
    rapids(cbind)

    j = frame(fr)
    fr = j['frames'][0]
    rows = fr['rows']
    vec_ids = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows))
    result.setNames(names)
    return result
Example #17
0
File: h2o.py Project: moidin/h2o-3
def which(condition):
  """
  :param condition: A conditional statement.
  :return: A H2OFrame of 1 column filled with 0-based indices for which the condition is True
  """
  return H2OFrame(expr=ExprNode("h2o.which",condition,False))._frame()