Beispiel #1
0
def import_frame(path=None, vecs=None):
  """
  Import a frame.
  :param path:
  :return:
  """
  return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
Beispiel #2
0
def import_frame(path=None, vecs=None):
    """
  Import a frame from a file (remote or local machine). If you run H2O on Hadoop, you can access to HDFS

  :param path: A path specifying the location of the data to import.
  :return: A new H2OFrame
  """
    return H2OFrame(vecs=vecs) if vecs else H2OFrame(remote_fname=path)
Beispiel #3
0
def ifelse(test, yes, no):
    """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
    test_a = None
    yes_a = None
    no_a = None

    test_tmp = None
    yes_tmp = None
    no_tmp = None

    if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE"
    else:
        if isinstance(test, H2OVec): test_tmp = test._expr.eager()
        else: test_tmp = test.key()
        test_a = "'" + test_tmp + "'"
    if isinstance(yes, (int, float)): yes_a = "#{}".format(str(yes))
    elif yes is None: yes_a = "#NaN"
    else:
        if isinstance(yes, H2OVec): yes_tmp = yes._expr.eager()
        else: yes_tmp = yes.key()
        yes_a = "'" + yes_tmp + "'"
    if isinstance(no, (int, float)): no_a = "#{}".format(str(no))
    elif no is None: no_a = "#NaN"
    else:
        if isinstance(no, H2OVec): no_tmp = no._expr.eager()
        else: no_tmp = no.key()
        no_a = "'" + no_tmp + "'"

    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ifelse {} {} {}))".format(tmp_key, test_a, yes_a, no_a)
    rapids(expr)
    j = frame(tmp_key)  # Fetch the frame as JSON
    fr = j['frames'][0]  # Just the first (only) frame
    rows = fr['rows']  # Row count
    veckeys = fr['vec_ids']  # List of h2o vec keys
    cols = fr['columns']  # List of columns
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys),
                           rows)  # Peel the Vecs out of the returned Frame
    removeFrameShallow(tmp_key)
    if yes_tmp is not None: removeFrameShallow(str(yes_tmp))
    if no_tmp is not None: removeFrameShallow(str(no_tmp))
    if test_tmp is not None: removeFrameShallow(str(test_tmp))
    return H2OFrame(vecs=vecs)
Beispiel #4
0
def upload_file(path, destination_key=""):
  """
  Upload a dataset at the path given from the local machine to the H2O cluster.

  :param path: A path specifying the location of the data to upload.
  :param destination_key: The name of the H2O Frame in the H2O Cluster.
  :return: A new H2OFrame
  """
  fui = {"file": os.path.abspath(path)}
  dest_key = H2OFrame.py_tmp_key() if destination_key == "" else destination_key
  H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_key=dest_key)
  return H2OFrame(text_key=dest_key)
Beispiel #5
0
def get_frame(frame_id):
  """
  Obtain a handle to the frame in H2O with the frame_id key.

  :return: An H2OFrame
  """
  return H2OFrame.get_frame(frame_id)
Beispiel #6
0
def get_timezone():
  """
  Get the Time Zone on the H2O Cloud

  :return: the time zone (string)
  """
  return H2OFrame(expr=ExprNode("getTimeZone"))._scalar()
Beispiel #7
0
def ls():
  """
  List Keys on an H2O Cluster

  :return: Returns a list of keys in the current H2O instance
  """
  return H2OFrame(expr=ExprNode("ls"))._frame().as_data_frame()
Beispiel #8
0
def get_frame(frame_id):
  """
  Obtain a handle to the frame in H2O with the frame_id key.

  :return: An H2OFrame
  """
  return H2OFrame.get_frame(frame_id)
Beispiel #9
0
def list_timezones():
  """
  Get a list of all the timezones

  :return: the time zones (as an H2OFrame)
  """
  return H2OFrame(expr=ExprNode("listTimeZones"))._frame()
Beispiel #10
0
def as_list(data, use_pandas=True):
    """
  Convert an H2O data object into a python-specific object.

  WARNING: This will pull all data local!

  If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame.
  Otherwise, a list-of-lists populated by character data will be returned (so the types of data will
  all be str).

  :param data: An H2O data object.
  :param use_pandas: Try to use pandas for reading in the data.
  :return: List of list (Rows x Columns).
  """

    # check to see if we can use pandas
    found_pandas = False
    try:
        imp.find_module('pandas')  # if have pandas, use this to eat a frame
        found_pandas = True
    except ImportError:
        found_pandas = False

    # if frame, download the frame and jam into lol or pandas df
    if isinstance(data, H2OFrame):
        fr = H2OFrame.send_frame(data)
        res = _as_data_frame(fr, use_pandas and found_pandas)
        removeFrameShallow(fr)
        return res

    if isinstance(data, Expr):
        if data.is_local(): return data._data
        if data.is_pending():
            data.eager()
            if data.is_local():
                return [data._data] if isinstance(data._data,
                                                  list) else [[data._data]]
        return _as_data_frame(data._data, use_pandas and found_pandas)

    if isinstance(data, H2OVec):
        if data._expr.is_local(): return data._expr._data
        if data._expr.is_pending():
            data._expr.eager()
            if data._expr.is_local(): return [[data._expr._data]]

        return as_list(H2OFrame(vecs=[data]), use_pandas)
Beispiel #11
0
def parse_raw(setup, id=None, first_line_is_header=(-1, 0, 1)):
    """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.
  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
    if id is None: id = H2OFrame.py_tmp_key()
    parsed = parse(setup, id, first_line_is_header)
    veckeys = parsed['vec_ids']
    rows = parsed['rows']
    cols = parsed['column_names'] if parsed["column_names"] else [
        "C" + str(x) for x in range(1,
                                    len(veckeys) + 1)
    ]
    vecs = H2OVec.new_vecs(zip(cols, veckeys), rows)
    return H2OFrame(vecs=vecs)
Beispiel #12
0
def get_frame(frame_id):
    if frame_id is None:
        raise ValueError("frame_id must not be None")
    res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
    res = res["frames"][0]
    colnames = [v["label"] for v in res["columns"]]
    veckeys = res["vec_ids"]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
    return H2OFrame(vecs=vecs)
Beispiel #13
0
def ls():
    """
  List Keys on an H2O Cluster
  :return: Returns a list of keys in the current H2O instance
  """
    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ls ))".format(tmp_key)
    rapids(expr)
    j = frame(tmp_key)
    fr = j['frames'][0]
    rows = fr['rows']
    veckeys = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows)
    fr = H2OFrame(vecs=vecs)
    fr.setNames(["keys"])
    print "First 10 Keys: "
    fr.show()
    return as_list(fr, use_pandas=False)
Beispiel #14
0
def export_file(frame,path,force=False):
  """
  Export a given H2OFrame to a path on the machine this python session is currently connected to. To view the current session, call h2o.cluster_info().

  :param frame: The Frame to save to disk.
  :param path: The path to the save point on disk.
  :param force: Overwrite any preexisting file with the same path
  :return: None
  """
  fr = H2OFrame.send_frame(frame)
  f = "true" if force else "false"
  H2OConnection.get_json("Frames/"+str(fr)+"/export/"+path+"/overwrite/"+f)
Beispiel #15
0
def ifelse(test,yes,no):
  """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
  return H2OFrame(expr=ExprNode("ifelse",test,yes,no))._frame()
Beispiel #16
0
def upload_file(path, destination_frame=""):
  """
  Upload a dataset at the path given from the local machine to the H2O cluster.

  :param path: A path specifying the location of the data to upload.
  :param destination_frame: The name of the H2O Frame in the H2O Cluster.
  :return: A new H2OFrame
  """
  fui = {"file": os.path.abspath(path)}
  destination_frame = H2OFrame.py_tmp_key() if destination_frame == "" else destination_frame
  H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_frame=destination_frame)
  return H2OFrame(text_key=destination_frame)
Beispiel #17
0
def ifelse(test,yes,no):
  """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
  test_a=None
  yes_a =None
  no_a  =None

  test_tmp = None
  yes_tmp  = None
  no_tmp   = None

  if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE"
  else:
    if isinstance(test,H2OVec): test_tmp = test._expr.eager()
    else:                       test_tmp = test.key()
    test_a = "'"+test_tmp+"'"
  if isinstance(yes, (int,float)): yes_a = "#{}".format(str(yes))
  elif yes is None:                yes_a = "#NaN"
  else:
    if isinstance(yes,H2OVec): yes_tmp = yes._expr.eager()
    else:                      yes_tmp = yes.key()
    yes_a = "'"+yes_tmp+"'"
  if isinstance(no, (int,float)): no_a = "#{}".format(str(no))
  elif no is None:                no_a = "#NaN"
  else:
    if isinstance(no,H2OVec): no_tmp = no._expr.eager()
    else:                     no_tmp = no.key()
    no_a = "'"+no_tmp+"'"

  tmp_key = H2OFrame.py_tmp_key()
  expr = "(= !{} (ifelse {} {} {}))".format(tmp_key,test_a,yes_a,no_a)
  rapids(expr)
  j = frame(tmp_key) # Fetch the frame as JSON
  fr = j['frames'][0]    # Just the first (only) frame
  rows = fr['rows']      # Row count
  veckeys = fr['vec_ids']# List of h2o vec keys
  cols = fr['columns']   # List of columns
  colnames = [col['label'] for col in cols]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows) # Peel the Vecs out of the returned Frame
  removeFrameShallow(tmp_key)
  if yes_tmp is not  None: removeFrameShallow(str(yes_tmp))
  if no_tmp is not   None: removeFrameShallow(str(no_tmp))
  if test_tmp is not None: removeFrameShallow(str(test_tmp))
  return H2OFrame(vecs=vecs)
def _check_frame(x,y,response):
  if not isinstance(x,H2OFrame):
    if not isinstance(x,list):
      raise ValueError("`x` must be an H2OFrame or a list of H2OVecs. Got: " + str(type(x)))
    x = H2OFrame(vecs=x)
  if y:
    if not isinstance(y,H2OVec):
      raise ValueError("`y` must be an H2OVec. Got: " + str(type(y)))
    for v in x._vecs:
      if y._name == v._name:
        raise ValueError("Found response "+y._name+" in training `x` data")
    x[response._name] = y
  return x
Beispiel #19
0
def export_file(frame, path, force=False):
    """
  Export a given H2OFrame to a path on the machine this python session is currently connected to. To view the current session, call h2o.cluster_info().

  :param frame: The Frame to save to disk.
  :param path: The path to the save point on disk.
  :param force: Overwrite any preexisting file with the same path
  :return: None
  """
    fr = H2OFrame.send_frame(frame)
    f = "true" if force else "false"
    H2OConnection.get_json("Frames/" + str(fr) + "/export/" + path +
                           "/overwrite/" + f)
Beispiel #20
0
def _simple_un_math_op(op, data):
    """
  Element-wise math operations on H2OFrame and H2OVec

  :param op: the math operation
  :param data: the H2OFrame or H2OVec object to operate on.
  :return: H2OFrame or H2oVec, with lazy operation
  """
    if isinstance(data, H2OFrame):
        return H2OFrame(
            vecs=[_simple_un_math_op(op, vec) for vec in data._vecs])
    if isinstance(data, H2OVec):
        return H2OVec(data._name, Expr(op, left=data, length=len(data)))
    raise ValueError, op + " only operates on H2OFrame or H2OVec objects"
Beispiel #21
0
def cbind(left, right):
    """
  :param left: H2OFrame or H2OVec
  :param right: H2OFrame or H2OVec
  :return: new H2OFrame with left|right cbinded
  """
    # Check left and right data types
    vecs = []
    if isinstance(left, H2OFrame) and isinstance(right, H2OFrame):
        vecs = left._vecs + right._vecs
    elif isinstance(left, H2OFrame) and isinstance(right, H2OVec):
        [vecs.append(vec) for vec in left._vecs]
        vecs.append(right)
    elif isinstance(left, H2OVec) and isinstance(right, H2OVec):
        vecs = [left, right]
    elif isinstance(left, H2OVec) and isinstance(right, H2OFrame):
        vecs.append(left)
        [vecs.append(vec) for vec in right._vecs]
    else:
        raise ValueError("left and right data must be H2OVec or H2OFrame")
    names = [vec.name() for vec in vecs]

    fr = H2OFrame.py_tmp_key()
    cbind = "(= !" + fr + " (cbind %FALSE %"
    cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))"
    rapids(cbind)

    j = frame(fr)
    fr = j['frames'][0]
    rows = fr['rows']
    vec_ids = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows))
    result.setNames(names)
    return result
Beispiel #22
0
def as_list(data, use_pandas=True):
  """
  Convert an H2O data object into a python-specific object.

  WARNING: This will pull all data local!

  If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame.
  Otherwise, a list-of-lists populated by character data will be returned (so the types of data will
  all be str).

  :param data: An H2O data object.
  :param use_pandas: Try to use pandas for reading in the data.
  :return: List of list (Rows x Columns).
  """
  return H2OFrame.as_data_frame(data, use_pandas)
Beispiel #23
0
def as_list(data, use_pandas=True):
  """
  Convert an H2O data object into a python-specific object.

  WARNING: This will pull all data local!

  If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame.
  Otherwise, a list-of-lists populated by character data will be returned (so the types of data will
  all be str).

  :param data: An H2O data object.
  :param use_pandas: Try to use pandas for reading in the data.
  :return: List of list (Rows x Columns).
  """
  return H2OFrame.as_data_frame(data, use_pandas)
Beispiel #24
0
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)):
  """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.
  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
  if id is None: id = H2OFrame.py_tmp_key()
  parsed = parse(setup, id, first_line_is_header)
  veckeys = parsed['vec_ids']
  rows = parsed['rows']
  cols = parsed['column_names'] if parsed["column_names"] else ["C" + str(x) for x in range(1,len(veckeys)+1)]
  vecs = H2OVec.new_vecs(zip(cols, veckeys), rows)
  return H2OFrame(vecs=vecs)
Beispiel #25
0
def rep_len(data, length_out):
  if isinstance(data, (str, int)):
    tmp_key = H2OFrame.py_tmp_key()
    scaler = '#{}'.format(data) if isinstance(data, int) else '\"{}\"'.format(data)
    expr = "(= !{} (rep_len {} {}))".format(tmp_key,scaler,'#{}'.format(length_out))
    rapids(expr)
    j = frame(tmp_key)
    fr = j['frames'][0]
    rows = fr['rows']
    veckeys = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows)
    removeFrameShallow(tmp_key)
    return H2OFrame(vecs=vecs)
  return data.rep_len(length_out=length_out)
Beispiel #26
0
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)):
  """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.

  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
  id = setup["destination_frame"]
  fr = H2OFrame()
  parsed = parse(setup, id, first_line_is_header)
  fr._nrows = parsed['rows']
  fr._col_names = parsed['column_names']
  fr._ncols = len(fr._col_names)
  fr._computed = True
  fr._id = id
  return fr
Beispiel #27
0
def as_list(data, use_pandas=True):
  """
  Convert an H2O data object into a python-specific object.

  WARNING: This will pull all data local!

  If Pandas is available (and use_pandas is True), then pandas will be used to parse the data frame.
  Otherwise, a list-of-lists populated by character data will be returned (so the types of data will
  all be str).

  :param data: An H2O data object.
  :param use_pandas: Try to use pandas for reading in the data.
  :return: List of list (Rows x Columns).
  """

  # check to see if we can use pandas
  found_pandas=False
  try:
    imp.find_module('pandas')  # if have pandas, use this to eat a frame
    found_pandas = True
  except ImportError:
    found_pandas = False

  # if frame, download the frame and jam into lol or pandas df
  if isinstance(data, H2OFrame):
    fr = H2OFrame.send_frame(data)
    res = _as_data_frame(fr, use_pandas and found_pandas)
    removeFrameShallow(fr)
    return res

  if isinstance(data, Expr):
    if data.is_local(): return data._data
    if data.is_pending():
      data.eager()
      if data.is_local(): return [data._data] if isinstance(data._data, list) else [[data._data]]
    return _as_data_frame(data._data, use_pandas and found_pandas)

  if isinstance(data, H2OVec):
    if data._expr.is_local(): return data._expr._data
    if data._expr.is_pending():
      data._expr.eager()
      if data._expr.is_local(): return [[data._expr._data]]

    return as_list(H2OFrame(vecs=[data]), use_pandas)
Beispiel #28
0
def ls():
  """
  List Keys on an H2O Cluster
  :return: Returns a list of keys in the current H2O instance
  """
  tmp_key = H2OFrame.py_tmp_key()
  expr = "(= !{} (ls ))".format(tmp_key)
  rapids(expr)
  j = frame(tmp_key)
  fr = j['frames'][0]
  rows = fr['rows']
  veckeys = fr['vec_ids']
  cols = fr['columns']
  colnames = [col['label'] for col in cols]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows)
  fr = H2OFrame(vecs=vecs)
  print "First 10 Keys: "
  fr.show()
  return as_list(fr, use_pandas=False)
Beispiel #29
0
def remove(object):
  """
  Remove object from H2O. This is a "hard" delete of the object. It removes all subparts.

  :param object: The object pointing to the object to be removed.
  :return: None
  """
  if object is None:
    raise ValueError("remove with no object is not supported, for your protection")

  if isinstance(object, H2OFrame):
    fr = H2OFrame.send_frame(object)
    remove(fr)
    object._vecs=[]

  elif isinstance(object, H2OVec):
    H2OConnection.delete("DKV/"+str(object.key()))
    object._expr=None
    object=None

  else:
    H2OConnection.delete("DKV/" + object)
Beispiel #30
0
def remove(object):
    """
  Remove object from H2O. This is a "hard" delete of the object. It removes all subparts.

  :param object: The object pointing to the object to be removed.
  :return: None
  """
    if object is None:
        raise ValueError(
            "remove with no object is not supported, for your protection")

    if isinstance(object, H2OFrame):
        fr = H2OFrame.send_frame(object)
        remove(fr)
        object._vecs = []

    elif isinstance(object, H2OVec):
        H2OConnection.delete("DKV/" + str(object.key()))
        object._expr = None
        object = None

    else:
        H2OConnection.delete("DKV/" + object)
Beispiel #31
0
def cbind(left,right):
  """
  :param left: H2OFrame or H2OVec
  :param right: H2OFrame or H2OVec
  :return: new H2OFrame with left|right cbinded
  """
  # Check left and right data types
  vecs = []
  if isinstance(left,H2OFrame) and isinstance(right,H2OFrame):
    vecs = left._vecs + right._vecs
  elif isinstance(left,H2OFrame) and isinstance(right,H2OVec):
    [vecs.append(vec) for vec in left._vecs]
    vecs.append(right)
  elif isinstance(left,H2OVec) and isinstance(right,H2OVec):
    vecs = [left, right]
  elif isinstance(left,H2OVec) and isinstance(right,H2OFrame):
    vecs.append(left)
    [vecs.append(vec) for vec in right._vecs]
  else:
    raise ValueError("left and right data must be H2OVec or H2OFrame")
  names = [vec.name() for vec in vecs]

  fr = H2OFrame.py_tmp_key()
  cbind = "(= !" + fr + " (cbind %FALSE %"
  cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))"
  rapids(cbind)

  j = frame(fr)
  fr = j['frames'][0]
  rows = fr['rows']
  vec_ids = fr['vec_ids']
  cols = fr['columns']
  colnames = [col['label'] for col in cols]
  result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows))
  result.setNames(names)
  return result
Beispiel #32
0
def which(condition):
  """
  :param condition: A conditional statement.
  :return: A H2OFrame of 1 column filled with 0-based indices for which the condition is True
  """
  return H2OFrame(expr=ExprNode("h2o.which",condition,False))._frame()
Beispiel #33
0
def export_file(frame,path,force=False):
  fr = H2OFrame.send_frame(frame)
  f = "true" if force else "false"
  H2OConnection.get_json("Frames/"+str(fr)+"/export/"+path+"/overwrite/"+f)