Esempio n. 1
0
File: h2o.py Progetto: dts3/h2o-3
def save_model(model, dir="", name="", filename="", force=False):
    """
  Save an H2O Model Object to Disk.
  In the case of existing files force = TRUE will overwrite the file. Otherwise, the operation will fail.
  :param dir: string indicating the directory the model will be written to.
  :param name: string name of the file.
  :param filename: full path to the file.
  :param force: logical, indicates how to deal with files that already exist
  :return: the path of the model (string)
  """
    if not isinstance(dir, str):
        raise ValueError("`dir` must be a character string")
    if dir == "":
        dir = os.getcwd()
    if not isinstance(name, str):
        raise ValueError("`name` must be a character string")
    if name == "":
        name = model._model_json["model_id"]["name"]
    if not isinstance(filename, str):
        raise ValueError("`filename` must be a character string")
    if not isinstance(force, bool):
        raise ValueError("`force` must be True or False")
    path = filename if filename != "" else os.path.join(dir, name)

    kwargs = dict([("dir", path), ("force", int(force)), ("_rest_version", 99)])
    H2OConnection.get("Models.bin/" + model._model_json["model_id"]["name"], **kwargs)
    return path
Esempio n. 2
0
def download_all_logs(dirname=".", filename=None):
    """
  Download H2O Log Files to Disk
  :param dirname: (Optional) A character string indicating the directory that the log file should be saved in.
  :param filename: (Optional) A string indicating the name that the CSV file should be
  :return: path of logs written (as a string)
  """
    url = 'http://' + H2OConnection.ip() + ':' + str(
        H2OConnection.port()) + '/Logs/download'
    response = urllib2.urlopen(url)

    if not os.path.exists(dirname): os.mkdir(dirname)
    if filename == None:
        for h in response.headers.headers:
            if 'filename=' in h:
                filename = h.split("filename=")[1].strip()
                break
    path = os.path.join(dirname, filename)

    with open(path, 'w') as f:
        response = urllib2.urlopen(url)
        f.write(response.read())
        f.close()

    print "Writing H2O logs to " + path
    return path
Esempio n. 3
0
def remove(key):
  """
  Remove a key from H2O.
  :param key: The key pointing to the object to be removed.
  :return: void
  """
  H2OConnection.delete("Remove", {"key": key})
Esempio n. 4
0
def remove_all():
    """
  Remove all objects from H2O.

  :return None
  """
    H2OConnection.delete("DKV")
Esempio n. 5
0
def _resolve_model(future_model, **kwargs):
    future_model.poll()
    if "_rest_version" in kwargs.keys():
        model_json = H2OConnection.get_json(
            "Models/" + future_model.job.dest_key, _rest_version=kwargs["_rest_version"]
        )["models"][0]
    else:
        model_json = H2OConnection.get_json("Models/" + future_model.job.dest_key)["models"][0]

    model_type = model_json["output"]["model_category"]
    if model_type == "Binomial":
        model = H2OBinomialModel(future_model.job.dest_key, model_json)
    elif model_type == "Clustering":
        model = H2OClusteringModel(future_model.job.dest_key, model_json)
    elif model_type == "Regression":
        model = H2ORegressionModel(future_model.job.dest_key, model_json)
    elif model_type == "Multinomial":
        model = H2OMultinomialModel(future_model.job.dest_key, model_json)
    elif model_type == "AutoEncoder":
        model = H2OAutoEncoderModel(future_model.job.dest_key, model_json)
    elif model_type == "DimReduction":
        model = H2ODimReductionModel(future_model.job.dest_key, model_json)
    else:
        raise NotImplementedError(model_type)
    return model
Esempio n. 6
0
def _resolve_model(future_model, **kwargs):
    future_model.poll()
    if '_rest_version' in kwargs.keys():
        model_json = H2OConnection.get_json(
            "Models/" + future_model.job.dest_key,
            _rest_version=kwargs['_rest_version'])["models"][0]
    else:
        model_json = H2OConnection.get_json(
            "Models/" + future_model.job.dest_key)["models"][0]

    model_type = model_json["output"]["model_category"]
    if model_type == "Binomial":
        model = H2OBinomialModel(future_model.job.dest_key, model_json)
    elif model_type == "Clustering":
        model = H2OClusteringModel(future_model.job.dest_key, model_json)
    elif model_type == "Regression":
        model = H2ORegressionModel(future_model.job.dest_key, model_json)
    elif model_type == "Multinomial":
        model = H2OMultinomialModel(future_model.job.dest_key, model_json)
    elif model_type == "AutoEncoder":
        model = H2OAutoEncoderModel(future_model.job.dest_key, model_json)
    elif model_type == "DimReduction":
        model = H2ODimReductionModel(future_model.job.dest_key, model_json)
    else:
        raise NotImplementedError(model_type)
    return model
Esempio n. 7
0
def download_all_logs(dirname=".",filename=None):
  """
  Download H2O Log Files to Disk
  :param dirname: (Optional) A character string indicating the directory that the log file should be saved in.
  :param filename: (Optional) A string indicating the name that the CSV file should be
  :return: path of logs written (as a string)
  """
  url = 'http://' + H2OConnection.ip() + ':' + str(H2OConnection.port()) + '/Logs/download'
  response = urllib2.urlopen(url)

  if not os.path.exists(dirname): os.mkdir(dirname)
  if filename == None:
    for h in response.headers.headers:
      if 'filename=' in h:
        filename = h.split("filename=")[1].strip()
        break
  path = os.path.join(dirname,filename)

  with open(path, 'w') as f:
    response = urllib2.urlopen(url)
    f.write(response.read())
    f.close()

  print "Writing H2O logs to " + path
  return path
Esempio n. 8
0
File: h2o.py Progetto: yuecong/h2o-3
def remove_all():
  """
  Remove all objects from H2O.

  :return None
  """
  H2OConnection.delete("DKV")
Esempio n. 9
0
def cluster_info():
    """
  Display the current H2O cluster information.

  :return: None
  """
    H2OConnection._cluster_info()
Esempio n. 10
0
File: h2o.py Progetto: yuecong/h2o-3
def cluster_info():
  """
  Display the current H2O cluster information.

  :return: None
  """
  H2OConnection._cluster_info()
def _model_build(x,y,validation_x,validation_y,algo_url,kwargs):
  # Basic sanity checking
  if algo_url == "autoencoder":
    if "autoencoder" in kwargs.keys():
      if kwargs["autoencoder"]:
        if y:
          raise ValueError("`y` should not be specified for autoencoder, remove `y` input.")
        algo_url="deeplearning"
  if not x:  raise ValueError("Missing features")
  x = _check_frame(x,y,y)
  if validation_x:
    validation_x = _check_frame(validation_x,validation_y,y)

  # Send frame descriptions to H2O cluster
  train_key = x.send_frame()
  kwargs['training_frame']=train_key
  if validation_x is not None:
    valid_key = validation_x.send_frame()
    kwargs['validation_frame']=valid_key

  if y:
    kwargs['response_column']=y._name

  kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None])

  # launch the job and poll
  job = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo_url, **kwargs), job_type=(algo_url+" Model Build")).poll()
  model_json = H2OConnection.get_json("Models/"+job.dest_key)["models"][0]
  model_type = model_json["output"]["model_category"]
  if model_type=="Binomial":
    from model.binomial import H2OBinomialModel
    model = H2OBinomialModel(job.dest_key,model_json)

  elif model_type=="Clustering":
    from model.clustering import H2OClusteringModel
    model = H2OClusteringModel(job.dest_key,model_json)

  elif model_type=="Regression":
    from model.regression import H2ORegressionModel
    model = H2ORegressionModel(job.dest_key,model_json)

  elif model_type=="Multinomial":
    from model.multinomial import H2OMultinomialModel
    model = H2OMultinomialModel(job.dest_key,model_json)

  elif model_type=="AutoEncoder":
    from model.autoencoder import H2OAutoEncoderModel
    model = H2OAutoEncoderModel(job.dest_key,model_json)

  else:
    print model_type
    raise NotImplementedError

  # Cleanup
  h2o.remove(train_key)
  if validation_x:
    h2o.remove(valid_key)

  return model
Esempio n. 12
0
def _model_build(x,y,validation_x,validation_y,algo_url,kwargs):
  # Basic sanity checking
  if algo_url == "autoencoder":
    if "autoencoder" in kwargs.keys():
      if kwargs["autoencoder"]:
        if y:
          raise ValueError("`y` should not be specified for autoencoder, remove `y` input.")
        algo_url="deeplearning"
  if not x:  raise ValueError("Missing features")
  x = _check_frame(x,y,y)
  if validation_x:
    validation_x = _check_frame(validation_x,validation_y,y)

  # Send frame descriptions to H2O cluster
  train_key = x.send_frame()
  kwargs['training_frame']=train_key
  if validation_x is not None:
    valid_key = validation_x.send_frame()
    kwargs['validation_frame']=valid_key

  if y:
    kwargs['response_column']=y._name

  kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None])

  # launch the job and poll
  job = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo_url, **kwargs), job_type=(algo_url+" Model Build")).poll()
  model_json = H2OConnection.get_json("Models/"+job.dest_key)["models"][0]
  model_type = model_json["output"]["model_category"]
  if model_type=="Binomial":
    from model.binomial import H2OBinomialModel
    model = H2OBinomialModel(job.dest_key,model_json)

  elif model_type=="Clustering":
    from model.clustering import H2OClusteringModel
    model = H2OClusteringModel(job.dest_key,model_json)

  elif model_type=="Regression":
    from model.regression import H2ORegressionModel
    model = H2ORegressionModel(job.dest_key,model_json)

  elif model_type=="Multinomial":
    from model.multinomial import H2OMultinomialModel
    model = H2OMultinomialModel(job.dest_key,model_json)

  elif model_type=="AutoEncoder":
    from model.autoencoder import H2OAutoEncoderModel
    model = H2OAutoEncoderModel(job.dest_key,model_json)

  else:
    print model_type
    raise NotImplementedError

  # Cleanup
  h2o.delete(train_key)
  if validation_x:
    h2o.delete(valid_key)

  return model
Esempio n. 13
0
def remove(key):
  """
  Remove key from H2O.

  :param key: The key pointing to the object to be removed.
  :return: Void
  """
  H2OConnection.delete("Remove", key=key)
Esempio n. 14
0
 def _upload_raw_data(self, tmp_file_path, column_names):
   # file upload info is the normalized path to a local file
   fui = {"file": os.path.abspath(tmp_file_path)}
   # create a random name for the data
   dest_key = H2OFrame.py_tmp_key()
   # do the POST -- blocking, and "fast" (does not real data upload)
   H2OConnection.post_json("PostFile", fui, destination_frame=dest_key)
   # actually parse the data and setup self._vecs
   self._handle_text_key(dest_key, column_names)
Esempio n. 15
0
def export_file(frame,path,force=False):
  """
  Export a given H2OFrame to a path on the machine this python session is currently connected to. To view the current session, call h2o.cluster_info().

  :param frame: The Frame to save to disk.
  :param path: The path to the save point on disk.
  :param force: Overwrite any preexisting file with the same path
  :return: None
  """
  H2OConnection.get_json("Frames/"+frame._id+"/export/"+path+"/overwrite/"+("true" if force else "false"))
Esempio n. 16
0
def shutdown(conn=None, prompt=True):
  """
  Shut down the specified instance. All data will be lost.
  This method checks if H2O is running at the specified IP address and port, and if it is, shuts down that H2O instance.

  :param conn: An H2OConnection object containing the IP address and port of the server running H2O.
  :param prompt: A logical value indicating whether to prompt the user before shutting down the H2O server.
  :return: None
  """
  if conn == None: conn = H2OConnection.current_connection()
  H2OConnection._shutdown(conn=conn, prompt=prompt)
Esempio n. 17
0
File: h2o.py Progetto: yuecong/h2o-3
def _as_data_frame(id, use_pandas):
  url = 'http://' + H2OConnection.ip() + ':' + str(H2OConnection.port()) + "/3/DownloadDataset?frame_id=" + urllib.quote(id) + "&hex_string=false"
  response = urllib2.urlopen(url)
  if use_pandas:
    import pandas
    return pandas.read_csv(response, low_memory=False)
  else:
    cr = csv.reader(response)
    rows = []
    for row in cr: rows.append(row)
    return rows
Esempio n. 18
0
def remove(key):
  """
  Remove key from H2O.

  :param key: The key pointing to the object to be removed.
  :return: Void
  """
  if key is None:
    raise ValueError("remove with no key is not supported, for your protection")

  H2OConnection.delete("DKV/" + key)
Esempio n. 19
0
File: h2o.py Progetto: moidin/h2o-3
def shutdown(conn=None, prompt=True):
  """
  Shut down the specified instance. All data will be lost.
  This method checks if H2O is running at the specified IP address and port, and if it is, shuts down that H2O instance.

  :param conn: An H2OConnection object containing the IP address and port of the server running H2O.
  :param prompt: A logical value indicating whether to prompt the user before shutting down the H2O server.
  :return: None
  """
  if conn == None: conn = H2OConnection.current_connection()
  H2OConnection._shutdown(conn=conn, prompt=prompt)
Esempio n. 20
0
def remove(key):
  """
  Remove key from H2O.

  :param key: The key pointing to the object to be removed.
  :return: Void
  """
  if key is None:
    raise ValueError("remove with no key is not supported, for your protection")

  H2OConnection.delete("DKV/" + key)
Esempio n. 21
0
File: h2o.py Progetto: moidin/h2o-3
def remove(object):
  """
  Remove object from H2O. This is a "hard" delete of the object. It removes all subparts.

  :param object: The object pointing to the object to be removed.
  :return: None
  """
  if object is None:
    raise ValueError("remove with no object is not supported, for your protection")

  if isinstance(object, H2OFrame): H2OConnection.delete("DKV/"+object._id)
  if isinstance(object, str):      H2OConnection.delete("DKV/"+object)
Esempio n. 22
0
File: h2o.py Progetto: yuecong/h2o-3
def upload_file(path, destination_frame=""):
  """
  Upload a dataset at the path given from the local machine to the H2O cluster.

  :param path: A path specifying the location of the data to upload.
  :param destination_frame: The name of the H2O Frame in the H2O Cluster.
  :return: A new H2OFrame
  """
  fui = {"file": os.path.abspath(path)}
  destination_frame = H2OFrame.py_tmp_key() if destination_frame == "" else destination_frame
  H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_frame=destination_frame)
  return H2OFrame(text_key=destination_frame)
Esempio n. 23
0
def upload_file(path, destination_key=""):
  """
  Upload a dataset at the path given from the local machine to the H2O cluster.

  :param path: A path specifying the location of the data to upload.
  :param destination_key: The name of the H2O Frame in the H2O Cluster.
  :return: A new H2OFrame
  """
  fui = {"file": os.path.abspath(path)}
  dest_key = H2OFrame.py_tmp_key() if destination_key == "" else destination_key
  H2OConnection.post_json(url_suffix="PostFile", file_upload_info=fui,destination_key=dest_key)
  return H2OFrame(text_key=dest_key)
Esempio n. 24
0
def remove(object):
  """
  Remove object from H2O. This is a "hard" delete of the object. It removes all subparts.

  :param object: The object pointing to the object to be removed.
  :return: None
  """
  if object is None:
    raise ValueError("remove with no object is not supported, for your protection")

  if isinstance(object, H2OFrame): H2OConnection.delete("DKV/"+object._id)
  if isinstance(object, str):      H2OConnection.delete("DKV/"+object)
Esempio n. 25
0
def export_file(frame, path, force=False):
    """
  Export a given H2OFrame to a path on the machine this python session is currently connected to. To view the current session, call h2o.cluster_info().

  :param frame: The Frame to save to disk.
  :param path: The path to the save point on disk.
  :param force: Overwrite any preexisting file with the same path
  :return: None
  """
    fr = H2OFrame.send_frame(frame)
    f = "true" if force else "false"
    H2OConnection.get_json("Frames/" + str(fr) + "/export/" + path +
                           "/overwrite/" + f)
Esempio n. 26
0
File: h2o.py Progetto: yuecong/h2o-3
def log_and_echo(message):
  """
  Log a message on the server-side logs
  This is helpful when running several pieces of work one after the other on a single H2O
  cluster and you want to make a notation in the H2O server side log where one piece of
  work ends and the next piece of work begins.

  Sends a message to H2O for logging. Generally used for debugging purposes.

  :param message: A character string with the message to write to the log.
  :return: None
  """
  if message is None: message = ""
  H2OConnection.post_json("LogAndEcho", message=message)
Esempio n. 27
0
def _as_data_frame(id, use_pandas):
    url = 'http://' + H2OConnection.ip() + ':' + str(
        H2OConnection.port()) + "/3/DownloadDataset?frame_id=" + urllib.quote(
            id) + "&hex_string=false"
    response = urllib2.urlopen(url)
    if use_pandas:
        import pandas
        return pandas.read_csv(response, low_memory=False)
    else:
        cr = csv.reader(response)
        rows = []
        for row in cr:
            rows.append(row)
        return rows
Esempio n. 28
0
def log_and_echo(message):
    """
  Log a message on the server-side logs
  This is helpful when running several pieces of work one after the other on a single H2O
  cluster and you want to make a notation in the H2O server side log where one piece of
  work ends and the next piece of work begins.

  Sends a message to H2O for logging. Generally used for debugging purposes.

  :param message: A character string with the message to write to the log.
  :return: None
  """
    if message is None: message = ""
    H2OConnection.post_json("LogAndEcho", message=message)
Esempio n. 29
0
def rapids(expr):
  """
  Fire off a Rapids expression
  :param expr: The rapids expression (ascii string)
  :return: The JSON response of the Rapids execution.
  """
  return H2OConnection.post_json(url_suffix="Rapids", params={"ast": urllib.quote(expr)})
Esempio n. 30
0
    def _upload_raw_data(self, tmp_file_path, column_names):

        # file upload info is the normalized path to a local file
        fui = {"file": os.path.abspath(tmp_file_path)}

        # create a random name for the data
        dest_key = H2OFrame.py_tmp_key()

        # params to the URL are the destination key that was just made in the prev step.
        p = {'destination_key': dest_key}

        # do the POST -- blocking, and "fast" (does not real data upload)
        H2OConnection.post_json(url_suffix="PostFile", params=p, file_upload_info=fui)

        # actually parse the data and setup self._vecs
        self._handle_raw_fname(dest_key, column_names=column_names)
Esempio n. 31
0
def cluster_status():
  """
  TODO: This isn't really a cluster status... it's a node status check for the node we're connected to.
  This is possibly confusing because this can come back without warning,
  but if a user tries to do any remoteSend, they will get a "cloud sick warning"

  Retrieve information on the status of the cluster running H2O.
  :return: None
  """
  cluster_json = H2OConnection.get_json("Cloud?skip_ticks=true")

  print "Version: {0}".format(cluster_json['version'])
  print "Cloud name: {0}".format(cluster_json['cloud_name'])
  print "Cloud size: {0}".format(cluster_json['cloud_size'])
  if cluster_json['locked']: print "Cloud is locked\n"
  else: print "Accepting new members\n"
  if cluster_json['nodes'] == None or len(cluster_json['nodes']) == 0:
    print "No nodes found"
    return

  status = []
  for node in cluster_json['nodes']:
    for k, v in zip(node.keys(),node.values()):
      if k in ["h2o", "healthy", "last_ping", "num_cpus", "sys_load", "mem_value_size", "total_value_size",
               "free_mem", "tot_mem", "max_mem", "free_disk", "max_disk", "pid", "num_keys", "tcps_active",
               "open_fds", "rpcs_active"]: status.append(k+": {0}".format(v))
    print ', '.join(status)
    print
Esempio n. 32
0
def _model_build(x, y, vx, vy, algo, offsets, weights, fold_column, kwargs):
    if x is None:
        raise ValueError("Missing features")
    x = _check_frame(x, y, y)
    vx = _check_frame(vx, vy, y)
    if offsets is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], offsets)
    if weights is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], weights)
    if fold_column is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], fold_column)

    kwargs["training_frame"] = x._id
    if vx is not None:
        kwargs["validation_frame"] = vx._id
    if y is not None:
        kwargs["response_column"] = y._col_names[0]

    kwargs = dict(
        [
            (k, kwargs[k]._frame()._id if isinstance(kwargs[k], H2OFrame) else kwargs[k])
            for k in kwargs
            if kwargs[k] is not None
        ]
    )

    do_future = kwargs.pop("do_future") if "do_future" in kwargs else False
    future_model = H2OModelFuture(
        H2OJob(H2OConnection.post_json("ModelBuilders/" + algo, **kwargs), job_type=(algo + " Model Build")), x
    )
    return future_model if do_future else _resolve_model(future_model, **kwargs)
Esempio n. 33
0
File: h2o.py Progetto: yuecong/h2o-3
def frames():
  """
  Retrieve all the Frames.

  :return: Meta information on the frames
  """
  return H2OConnection.get_json("Frames")
Esempio n. 34
0
def interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None):
  """
  Categorical Interaction Feature Creation in H2O.
  Creates a frame in H2O with n-th order interaction features between categorical columns, as specified by
  the user.

  :param data: the H2OFrame that holds the target categorical columns.
  :param factors: factors Factor columns (either indices or column names).
  :param pairwise: Whether to create pairwise interactions between factors (otherwise create one
  higher-order interaction). Only applicable if there are 3 or more factors.
  :param max_factors: Max. number of factor levels in pair-wise interaction terms (if enforced, one extra catch-all
  factor will be made)
  :param min_occurrence: Min. occurrence threshold for factor levels in pair-wise interaction terms
  :param destination_frame: A string indicating the destination key. If empty, this will be auto-generated by H2O.
  :return: H2OFrame
  """
  data._eager()
  factors = [data.names()[n] if isinstance(n,int) else n for n in factors]
  parms = {"dest": _py_tmp_key() if destination_frame is None else destination_frame,
           "source_frame": data._id,
           "factor_columns": [_quoted(f) for f in factors],
           "pairwise": pairwise,
           "max_factors": max_factors,
           "min_occurrence": min_occurrence,
           }
  H2OJob(H2OConnection.post_json("Interaction", **parms), "Interactions").poll()
  return get_frame(parms["dest"])
Esempio n. 35
0
File: h2o.py Progetto: moidin/h2o-3
def interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None):
  """
  Categorical Interaction Feature Creation in H2O.
  Creates a frame in H2O with n-th order interaction features between categorical columns, as specified by
  the user.

  :param data: the H2OFrame that holds the target categorical columns.
  :param factors: factors Factor columns (either indices or column names).
  :param pairwise: Whether to create pairwise interactions between factors (otherwise create one
  higher-order interaction). Only applicable if there are 3 or more factors.
  :param max_factors: Max. number of factor levels in pair-wise interaction terms (if enforced, one extra catch-all
  factor will be made)
  :param min_occurrence: Min. occurrence threshold for factor levels in pair-wise interaction terms
  :param destination_frame: A string indicating the destination key. If empty, this will be auto-generated by H2O.
  :return: H2OFrame
  """
  data._eager()
  factors = [data.names()[n] if isinstance(n,int) else n for n in factors]
  parms = {"dest": _py_tmp_key() if destination_frame is None else destination_frame,
           "source_frame": data._id,
           "factor_columns": [_quoted(f) for f in factors],
           "pairwise": pairwise,
           "max_factors": max_factors,
           "min_occurrence": min_occurrence,
           }
  H2OJob(H2OConnection.post_json("Interaction", **parms), "Interactions").poll()
  return get_frame(parms["dest"])
Esempio n. 36
0
def frame(key):
  """
  Retrieve metadata for a key that points to a Frame.
  :param key: A pointer to a Frame in H2O.
  :return: Meta information on the Frame.
  """
  return H2OConnection.get_json(url_suffix="Frames/" + key)
Esempio n. 37
0
def _model_build(x, y, vx, vy, algo, offsets, weights, fold_column, kwargs):
    if x is None: raise ValueError("Missing features")
    x = _check_frame(x, y, y)
    vx = _check_frame(vx, vy, y)
    if offsets is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], offsets)
    if weights is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], weights)
    if fold_column is not None:
        x, vx = _check_col(x, vx, kwargs["validation_frame"], fold_column)

    kwargs['training_frame'] = x.frame_id
    if vx is not None: kwargs['validation_frame'] = vx.frame_id
    if y is not None: kwargs['response_column'] = y.names[0]

    kwargs = dict([
        (k,
         kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k])
        for k in kwargs if kwargs[k] is not None
    ])

    do_future = kwargs.pop("do_future") if "do_future" in kwargs else False
    future_model = H2OModelFuture(
        H2OJob(H2OConnection.post_json("ModelBuilders/" + algo, **kwargs),
               job_type=(algo + " Model Build")), x)
    return future_model if do_future else _resolve_model(
        future_model, **kwargs)
Esempio n. 38
0
def frames():
    """
  Retrieve all the Frames.

  :return: Meta information on the frames
  """
    return H2OConnection.get_json("Frames")
Esempio n. 39
0
File: h2o.py Progetto: yuecong/h2o-3
def get_model(model_id):
  """
  Return the specified model

  :param model_id: The model identification in h2o
  """
  model_json = H2OConnection.get_json("Models/"+model_id)["models"][0]
  model_type = model_json["output"]["model_category"]
  if model_type=="Binomial":
    from model.binomial import H2OBinomialModel
    model = H2OBinomialModel(model_id, model_json)

  elif model_type=="Clustering":
    from model.clustering import H2OClusteringModel
    model = H2OClusteringModel(model_id, model_json)

  elif model_type=="Regression":
    from model.regression import H2ORegressionModel
    model = H2ORegressionModel(model_id, model_json)

  elif model_type=="Multinomial":
    from model.multinomial import H2OMultinomialModel
    model = H2OMultinomialModel(model_id, model_json)

  elif model_type=="AutoEncoder":
    from model.autoencoder import H2OAutoEncoderModel
    model = H2OAutoEncoderModel(model_id, model_json)

  else:
    print model_type
    raise NotImplementedError

  return model
Esempio n. 40
0
def init(ip="localhost",
         port=54321,
         size=1,
         start_h2o=False,
         enable_assertions=False,
         license=None,
         max_mem_size_GB=None,
         min_mem_size_GB=None,
         ice_root=None,
         strict_version_check=True):
    """
  Initiate an H2O connection to the specified ip and port.

  :param ip: An IP address, default is "localhost"
  :param port: A port, default is 54321
  :param size: THe expected number of h2o instances (ignored if start_h2o is True)
  :param start_h2o: A boolean dictating whether this module should start the H2O jvm. An attempt is made anyways if _connect fails.
  :param enable_assertions: If start_h2o, pass `-ea` as a VM option.s
  :param license: If not None, is a path to a license file.
  :param max_mem_size_GB: Maximum heap size (jvm option Xmx) in gigabytes.
  :param min_mem_size_GB: Minimum heap size (jvm option Xms) in gigabytes.
  :param ice_root: A temporary directory (default location is determined by tempfile.mkdtemp()) to hold H2O log files.
  :return: None
  """
    H2OConnection(ip=ip,
                  port=port,
                  start_h2o=start_h2o,
                  enable_assertions=enable_assertions,
                  license=license,
                  max_mem_size_GB=max_mem_size_GB,
                  min_mem_size_GB=min_mem_size_GB,
                  ice_root=ice_root,
                  strict_version_check=strict_version_check)
    return None
Esempio n. 41
0
def get_model(model_id):
    """
  Return the specified model

  :param model_id: The model identification in h2o
  """
    model_json = H2OConnection.get_json("Models/" + model_id)["models"][0]
    model_type = model_json["output"]["model_category"]
    if model_type == "Binomial":
        from model.binomial import H2OBinomialModel
        model = H2OBinomialModel(model_id, model_json)

    elif model_type == "Clustering":
        from model.clustering import H2OClusteringModel
        model = H2OClusteringModel(model_id, model_json)

    elif model_type == "Regression":
        from model.regression import H2ORegressionModel
        model = H2ORegressionModel(model_id, model_json)

    elif model_type == "Multinomial":
        from model.multinomial import H2OMultinomialModel
        model = H2OMultinomialModel(model_id, model_json)

    elif model_type == "AutoEncoder":
        from model.autoencoder import H2OAutoEncoderModel
        model = H2OAutoEncoderModel(model_id, model_json)

    else:
        print model_type
        raise NotImplementedError

    return model
Esempio n. 42
0
def cluster_status():
    """
  TODO: This isn't really a cluster status... it's a node status check for the node we're connected to.
  This is possibly confusing because this can come back without warning,
  but if a user tries to do any remoteSend, they will get a "cloud sick warning"

  Retrieve information on the status of the cluster running H2O.
  :return: None
  """
    cluster_json = H2OConnection.get_json("Cloud?skip_ticks=true")

    print "Version: {0}".format(cluster_json['version'])
    print "Cloud name: {0}".format(cluster_json['cloud_name'])
    print "Cloud size: {0}".format(cluster_json['cloud_size'])
    if cluster_json['locked']: print "Cloud is locked\n"
    else: print "Accepting new members\n"
    if cluster_json['nodes'] == None or len(cluster_json['nodes']) == 0:
        print "No nodes found"
        return

    status = []
    for node in cluster_json['nodes']:
        for k, v in zip(node.keys(), node.values()):
            if k in [
                    "h2o", "healthy", "last_ping", "num_cpus", "sys_load",
                    "mem_value_size", "total_value_size", "free_mem",
                    "tot_mem", "max_mem", "free_disk", "max_disk", "pid",
                    "num_keys", "tcps_active", "open_fds", "rpcs_active"
            ]:
                status.append(k + ": {0}".format(v))
        print ', '.join(status)
        print
Esempio n. 43
0
def _model_build(x,y,validation_x,validation_y,algo_url,kwargs):
  # Basic sanity checking
  if algo_url == "autoencoder":
    if "autoencoder" in kwargs.keys():
      if kwargs["autoencoder"]:
        if y:
          raise ValueError("`y` should not be specified for autoencoder, remove `y` input.")
        algo_url="deeplearning"
  if not x:  raise ValueError("Missing features")
  x = _check_frame(x,y,y)
  if validation_x is not None: validation_x = _check_frame(validation_x,validation_y,y)

  if "weights_column" in kwargs.keys(): x, validation_x = _add_col_to_x_and_validation_x(kwargs["weights_column"],x, validation_x, kwargs)
  if "offset_column"  in kwargs.keys(): x, validation_x = _add_col_to_x_and_validation_x(kwargs["offset_column"], x, validation_x, kwargs)
  if "fold_column"   in kwargs.keys(): x, validation_x = _add_col_to_x_and_validation_x(kwargs["fold_column"],    x, validation_x, kwargs, xval=True)

  # Send frame descriptions to H2O cluster
  kwargs['training_frame']=x._id
  if validation_x is not None: kwargs['validation_frame']=validation_x._id

  if y is not None: kwargs['response_column']=y._col_names[0]

  kwargs = dict([(k, kwargs[k]._frame()._id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if
                 kwargs[k] is not None])

  # launch the job (only resolve the model if do_future is False)
  do_future = "do_future" in kwargs.keys() and kwargs["do_future"]
  if "do_future" in kwargs.keys(): kwargs.pop("do_future")
  future_model = H2OModelFuture(H2OJob(H2OConnection.post_json("ModelBuilders/"+algo_url, **kwargs),
                                       job_type=(algo_url+" Model Build")), x)
  if do_future: return future_model
  else: return _resolve_model(future_model, **kwargs)
Esempio n. 44
0
File: h2o.py Progetto: moidin/h2o-3
def download_csv(data, filename):
  """
  Download an H2O data set to a CSV file on the local disk.
  Warning: Files located on the H2O server may be very large! Make
  sure you have enough hard drive space to accommodate the entire file.

  :param data: an H2OFrame object to be downloaded.
  :param filename:A string indicating the name that the CSV file should be
  should be saved to.
  :return: None
  """
  if not isinstance(data, H2OFrame): raise(ValueError, "`data` argument must be an H2OFrame, but got " + type(data))
  url = "http://{}:{}/3/DownloadDataset?frame_id={}".format(H2OConnection.ip(),H2OConnection.port(),data._id)
  with open(filename, 'w') as f:
    response = urllib2.urlopen(url)
    f.write(response.read())
    f.close()
Esempio n. 45
0
File: h2o.py Progetto: yuecong/h2o-3
def frame(frame_id):
  """
  Retrieve metadata for a id that points to a Frame.

  :param frame_id: A pointer to a Frame  in H2O.
  :return: Meta information on the frame
  """
  return H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
Esempio n. 46
0
def frame(frame_id):
    """
  Retrieve metadata for a id that points to a Frame.

  :param frame_id: A pointer to a Frame  in H2O.
  :return: Meta information on the frame
  """
    return H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
Esempio n. 47
0
def frame(key):
  """
  Retrieve metadata for a key that points to a Frame.

  :param key: A pointer to a Frame  in H2O.
  :return: Meta information on the frame
  """
  return H2OConnection.get_json("Frames/" + key)
Esempio n. 48
0
def download_csv(data, filename):
  """
  Download an H2O data set to a CSV file on the local disk.
  Warning: Files located on the H2O server may be very large! Make
  sure you have enough hard drive space to accommodate the entire file.

  :param data: an H2OFrame object to be downloaded.
  :param filename:A string indicating the name that the CSV file should be
  should be saved to.
  :return: None
  """
  if not isinstance(data, H2OFrame): raise(ValueError, "`data` argument must be an H2OFrame, but got " + type(data))
  url = "http://{}:{}/3/DownloadDataset?frame_id={}".format(H2OConnection.ip(),H2OConnection.port(),data._id)
  with open(filename, 'w') as f:
    response = urllib2.urlopen(url)
    f.write(response.read())
    f.close()
Esempio n. 49
0
def _model_build(x, y, validation_x, validation_y, algo_url, kwargs):
    # Basic sanity checking
    if not x: raise ValueError("Missing features")
    x = _check_frame(x, y, y)
    if validation_x:
        validation_x = _check_frame(validation_x, validation_y, y)

    # Send frame descriptions to H2O cluster
    train_key = x.send_frame()
    kwargs['training_frame'] = train_key
    if validation_x:
        valid_key = validation_x.send_frame()
        kwargs['validation_frame'] = valid_key

    if y:
        kwargs['response_column'] = y._name

    # launch the job and poll
    job = H2OJob(H2OConnection.post_json("ModelBuilders/" + algo_url,
                                         **kwargs),
                 job_type=(algo_url + " Model Build")).poll()
    model_json = H2OConnection.get_json("Models/" + job.dest_key)["models"][0]
    model_type = model_json["output"]["model_category"]
    if model_type == "Binomial":
        from model.binomial import H2OBinomialModel
        model = H2OBinomialModel(job.dest_key, model_json)

    elif model_type == "Clustering":
        from model.clustering import H2OClusteringModel
        model = H2OClusteringModel(job.dest_key, model_json)

    elif model_type == "Regression":
        from model.regression import H2ORegressionModel
        model = H2ORegressionModel(job.dest_key, model_json)

    else:
        print model_type
        raise NotImplementedError

    # Cleanup
    h2o.remove(train_key)
    if validation_x:
        h2o.remove(valid_key)

    return model
Esempio n. 50
0
def init(ip="localhost", port=54321):
  """
  Initiate an H2O connection to the specified ip and port
  :param ip: An IP address, default is "localhost"
  :param port: A port, default is 54321
  :return: None
  """
  H2OConnection(ip=ip, port=port)
  return None
Esempio n. 51
0
def parse_setup(raw_frames, column_types=None):
    """
  :param raw_frames: A collection of imported file frames
  :return: A ParseSetup "object"
  """

    # The H2O backend only accepts things that are quoted
    if isinstance(raw_frames, unicode): raw_frames = [raw_frames]
    if column_types is not None:
        j = H2OConnection.post_json(
            url_suffix="ParseSetup",
            source_frames=[_quoted(id) for id in raw_frames],
            column_types=[_quoted(id) for id in column_types])
    else:
        j = H2OConnection.post_json(
            url_suffix="ParseSetup",
            source_frames=[_quoted(id) for id in raw_frames])
    return j
Esempio n. 52
0
def get_frame(frame_id):
    if frame_id is None:
        raise ValueError("frame_id must not be None")
    res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
    res = res["frames"][0]
    colnames = [v["label"] for v in res["columns"]]
    veckeys = res["vec_ids"]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
    return H2OFrame(vecs=vecs)
Esempio n. 53
0
File: h2o.py Progetto: yuecong/h2o-3
def get_frame(frame_id):
  if frame_id is None:
    raise ValueError("frame_id must not be None")
  res = H2OConnection.get_json("Frames/"+urllib.quote(frame_id))
  res = res["frames"][0]
  colnames = [v["label"] for v in res["columns"]]
  veckeys  = res["vec_ids"]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
  return H2OFrame(vecs=vecs)
Esempio n. 54
0
def parse(setup, h2o_name, first_line_is_header=(-1, 0, 1)):
    """
  Trigger a parse; blocking; removeFrame just keep the Vecs.

  :param setup: The result of calling parse_setup.
  :param h2o_name: The name of the H2O Frame on the back end.
  :param first_line_is_header: -1 means data, 0 means guess, 1 means header.
  :return: A new parsed object
  """
    # Parse parameters (None values provided by setup)
    p = {
        'destination_frame': h2o_name,
        'parse_type': None,
        'separator': None,
        'single_quotes': None,
        'check_header': None,
        'number_columns': None,
        'chunk_size': None,
        'delete_on_done': True,
        'blocking': True,
        'remove_frame': True
    }
    if isinstance(first_line_is_header, tuple):
        first_line_is_header = setup["check_header"]

    if setup["column_names"]:
        setup["column_names"] = [
            _quoted(name) for name in setup["column_names"]
        ]
        p["column_names"] = None

    if setup["column_types"]:
        setup["column_types"] = [
            _quoted(name) for name in setup["column_types"]
        ]
        p["column_types"] = None

    if setup["na_strings"]:
        setup["na_strings"] = [[_quoted(na)
                                for na in col] if col is not None else []
                               for col in setup["na_strings"]]
        p["na_strings"] = None

    # update the parse parameters with the parse_setup values
    p.update({k: v for k, v in setup.iteritems() if k in p})

    p["check_header"] = first_line_is_header

    # Extract only 'name' from each src in the array of srcs
    p['source_frames'] = [
        _quoted(src['name']) for src in setup['source_frames']
    ]

    # Request blocking parse
    j = H2OJob(H2OConnection.post_json(url_suffix="Parse", **p),
               "Parse").poll()
    return j.jobs
Esempio n. 55
0
def load_model(path):
  """
  Load a saved H2O model from disk.
  :param path: The full path of the H2O Model to be imported.
  :return: the model
  """
  if not isinstance(path, str): raise ValueError("`path` must be a non-empty character string")
  kwargs = dict([("dir",path), ("_rest_version", 99)])
  res = H2OConnection.post("Models.bin/", **kwargs)
  return get_model(res.json()['models'][0]['model_id']['name'])
Esempio n. 56
0
File: h2o.py Progetto: yuecong/h2o-3
def frame_summary(key):
  """
  Retrieve metadata and summary information for a key that points to a Frame/Vec

  :param key: A pointer to a Frame/Vec in H2O
  :return: Meta and summary info on the frame
  """
  # frames_meta = H2OConnection.get_json("Frames/" + key)
  frame_summary =  H2OConnection.get_json("Frames/" + urllib.quote(key) + "/summary")
  return frame_summary
Esempio n. 57
0
File: h2o.py Progetto: yuecong/h2o-3
def parse_setup(raw_frames):
  """
  :param raw_frames: A collection of imported file frames
  :return: A ParseSetup "object"
  """

  # The H2O backend only accepts things that are quoted
  if isinstance(raw_frames, unicode): raw_frames = [raw_frames]
  j = H2OConnection.post_json(url_suffix="ParseSetup", source_frames=[_quoted(id) for id in raw_frames])
  return j
Esempio n. 58
0
File: h2o.py Progetto: yuecong/h2o-3
def rapids(expr):
  """
  Fire off a Rapids expression.

  :param expr: The rapids expression (ascii string).
  :return: The JSON response of the Rapids execution
  """
  result = H2OConnection.post_json("Rapids", ast=urllib.quote(expr))
  if result['error'] is not None:
    raise EnvironmentError("rapids expression not evaluated: {0}".format(str(result['error'])))
  return result
Esempio n. 59
0
def parse_setup(rawkey):
  """
  :param rawkey: A collection of imported file keys
  :return: A ParseSetup "object"
  """

  # So the st00pid H2O backend only accepts things that are quoted (nasty Java)
  if isinstance(rawkey, unicode): rawkey = [rawkey]
  j = H2OConnection.post_json(url_suffix="ParseSetup", source_keys=[_quoted(key) for key in rawkey])
  if not j['is_valid']:
    raise ValueError("ParseSetup not Valid", j)
  return j