Ejemplo n.º 1
0
  def model_performance(self, test_data=None, train=False, valid=False):
    """
    Generate model metrics for this model on test_data.

    :param test_data: Data set for which model metrics shall be computed against. Both train and valid arguments are ignored if test_data is not None.
    :param train: Report the training metrics for the model. If the test_data is the training data, the training metrics are returned.
    :param valid: Report the validation metrics for the model. If train and valid are True, then it defaults to True.
    :return: An object of class H2OModelMetrics.
    """
    if test_data is None:
      if not train and not valid:
        train = True  # default to train

      if train:
        return self._model_json["output"]["training_metrics"]

      if valid:
        return self._model_json["output"]["validation_metrics"]

    else:  # cases dealing with test_data not None
      if not isinstance(test_data, H2OFrame):
        raise ValueError("`test_data` must be of type H2OFrame.  Got: " + type(test_data))
      fr_key = H2OFrame.send_frame(test_data)
      res = H2OConnection.post_json("ModelMetrics/models/" + self._key + "/frames/" + fr_key)
      h2o.removeFrameShallow(fr_key)

      # FIXME need to do the client-side filtering...  PUBDEV-874:   https://0xdata.atlassian.net/browse/PUBDEV-874
      raw_metrics = None
      for mm in res["model_metrics"]:
        if mm["frame"]["name"] == fr_key:
          raw_metrics = mm
          break
      return self._metrics_class(raw_metrics,algo=self._model_json["algo"])
Ejemplo n.º 2
0
  def predict(self, test_data):
    """
    Predict on a dataset.

    :param test_data: Data to be predicted on.
    :return: A new H2OFrame filled with predictions.
    """
    if not test_data: raise ValueError("Must specify test data")
    # cbind the test_data vecs together and produce a temp key
    test_data_key = H2OFrame.send_frame(test_data)
    # get the predictions
    # this job call is blocking
    j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key)
    # toast the cbound frame
    h2o.removeFrameShallow(test_data_key)
    # retrieve the prediction frame
    prediction_frame_key = j["model_metrics"][0]["predictions"]["frame_id"]["name"]
    # get the actual frame meta dta
    pred_frame_meta = h2o.frame(prediction_frame_key)["frames"][0]
    # toast the prediction frame
    h2o.removeFrameShallow(prediction_frame_key)
    # collect the vec_ids
    vec_ids = pred_frame_meta["vec_ids"]
    # get the number of rows
    rows = pred_frame_meta["rows"]
    # get the column names
    cols = [col["label"] for col in pred_frame_meta["columns"]]
    # create a set of H2OVec objects
    vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows)
    # return a new H2OFrame object
    return H2OFrame(vecs=vecs)
Ejemplo n.º 3
0
  def predict(self, test_data):
    """
    Predict on a dataset.

    :param test_data: Data to be predicted on.
    :return: A new H2OFrame filled with predictions.
    """
    if not test_data: raise ValueError("Must specify test data")
    # cbind the test_data vecs together and produce a temp key
    test_data_key = H2OFrame.send_frame(test_data)
    # get the predictions
    # this job call is blocking
    j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key)
    # toast the cbound frame
    h2o.removeFrameShallow(test_data_key)
    # retrieve the prediction frame
    prediction_frame_key = j["predictions_frame"]["name"]
    # get the actual frame meta dta
    pred_frame_meta = h2o.frame(prediction_frame_key)["frames"][0]
    # toast the prediction frame
    h2o.removeFrameShallow(prediction_frame_key)
    # collect the vec_ids
    vec_ids = pred_frame_meta["vec_ids"]
    # get the number of rows
    rows = pred_frame_meta["rows"]
    # get the column names
    cols = [col["label"] for col in pred_frame_meta["columns"]]
    # create a set of H2OVec objects
    vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows)
    # return a new H2OFrame object
    return H2OFrame(vecs=vecs)
Ejemplo n.º 4
0
  def model_performance(self, test_data=None, train=False, valid=False):
    """
    Generate model metrics for this model on test_data.

    :param test_data: Data set for which model metrics shall be computed against. Both train and valid arguments are ignored if test_data is not None.
    :param train: Report the training metrics for the model. If the test_data is the training data, the training metrics are returned.
    :param valid: Report the validation metrics for the model. If train and valid are True, then it defaults to True.
    :return: An object of class H2OModelMetrics.
    """
    if test_data is None:
      if not train and not valid:
        train = True  # default to train

      if train:
        return self._model_json["output"]["training_metrics"]

      if valid:
        return self._model_json["output"]["validation_metrics"]

    else:  # cases dealing with test_data not None
      if not isinstance(test_data, H2OFrame):
        raise ValueError("`test_data` must be of type H2OFrame.  Got: " + type(test_data))
      fr_key = H2OFrame.send_frame(test_data)
      res = H2OConnection.post_json("ModelMetrics/models/" + self._key + "/frames/" + fr_key)
      h2o.removeFrameShallow(fr_key)

      # FIXME need to do the client-side filtering...  PUBDEV-874:   https://0xdata.atlassian.net/browse/PUBDEV-874
      raw_metrics = None
      for mm in res["model_metrics"]:
        if mm["frame"]["name"] == fr_key:
          raw_metrics = mm
          break
      return self._metrics_class(raw_metrics,algo=self._model_json["algo"])
Ejemplo n.º 5
0
  def deepfeatures(self, test_data, layer):
    """
    Return hidden layer details

    :param test_data: Data to create a feature space on
    :param layer: 0 index hidden layer
    """
    if not test_data: raise ValueError("Must specify test data")
    # create test_data by cbinding vecs
    test_data_key = H2OFrame.send_frame(test_data)
    # get the deepfeatures of the dataset
    j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key, deep_features_hidden_layer=layer)
    # retreive the frame data
    deepfeatures_frame_key = j["predictions_frame"]["name"]
    df_frame_meta = h2o.frame(deepfeatures_frame_key)["frames"][0]
    # create vecs by extracting vec_ids, col length, and col names
    vec_ids = df_frame_meta["vec_ids"]
    rows = df_frame_meta["rows"]
    cols = [col["label"] for col in df_frame_meta["columns"]]
    vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows)
    # remove test data from kv
    h2o.removeFrameShallow(test_data_key)
    # finally return frame
    return H2OFrame(vecs=vecs)
Ejemplo n.º 6
0
  def deepfeatures(self, test_data, layer):
    """
    Return hidden layer details

    :param test_data: Data to create a feature space on
    :param layer: 0 index hidden layer
    """
    if not test_data: raise ValueError("Must specify test data")
    # create test_data by cbinding vecs
    test_data_key = H2OFrame.send_frame(test_data)
    # get the deepfeatures of the dataset
    j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key, deep_features_hidden_layer=layer)
    # retreive the frame data
    deepfeatures_frame_key = j["predictions_frame"]["name"]
    df_frame_meta = h2o.frame(deepfeatures_frame_key)["frames"][0]
    # create vecs by extracting vec_ids, col length, and col names
    vec_ids = df_frame_meta["vec_ids"]
    rows = df_frame_meta["rows"]
    cols = [col["label"] for col in df_frame_meta["columns"]]
    vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows)
    # remove test data from kv
    h2o.removeFrameShallow(test_data_key)
    # finally return frame
    return H2OFrame(vecs=vecs)
Ejemplo n.º 7
0
def _model_build(x,y,validation_x,validation_y,algo_url,kwargs):
  # Basic sanity checking
  if algo_url == "autoencoder":
    if "autoencoder" in kwargs.keys():
      if kwargs["autoencoder"]:
        if y:
          raise ValueError("`y` should not be specified for autoencoder, remove `y` input.")
        algo_url="deeplearning"
  if not x:  raise ValueError("Missing features")
  x = _check_frame(x,y,y)
  if validation_x:
    validation_x = _check_frame(validation_x,validation_y,y)

  # Send frame descriptions to H2O cluster
  train_key = x.send_frame()
  kwargs['training_frame']=train_key
  if validation_x is not None:
    valid_key = validation_x.send_frame()
    kwargs['validation_frame']=valid_key

  if y:
    kwargs['response_column']=y._name

  kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None])

  # launch the job and poll
  job = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo_url, **kwargs), job_type=(algo_url+" Model Build")).poll()
  model_json = H2OConnection.get_json("Models/"+job.dest_key)["models"][0]
  model_type = model_json["output"]["model_category"]
  if model_type=="Binomial":
    from model.binomial import H2OBinomialModel
    model = H2OBinomialModel(job.dest_key,model_json)

  elif model_type=="Clustering":
    from model.clustering import H2OClusteringModel
    model = H2OClusteringModel(job.dest_key,model_json)

  elif model_type=="Regression":
    from model.regression import H2ORegressionModel
    model = H2ORegressionModel(job.dest_key,model_json)

  elif model_type=="Multinomial":
    from model.multinomial import H2OMultinomialModel
    model = H2OMultinomialModel(job.dest_key,model_json)

  elif model_type=="AutoEncoder":
    from model.autoencoder import H2OAutoEncoderModel
    model = H2OAutoEncoderModel(job.dest_key,model_json)

  elif model_type=="DimReduction":
    from model.dim_reduction import H2ODimReductionModel
    model = H2ODimReductionModel(job.dest_key,model_json)

  else:
    print model_type
    raise NotImplementedError

  # Cleanup
  h2o.removeFrameShallow(train_key)
  if validation_x:
    h2o.removeFrameShallow(valid_key)

  return model
Ejemplo n.º 8
0
def _model_build(x, y, validation_x, validation_y, algo_url, kwargs):
    # Basic sanity checking
    if algo_url == "autoencoder":
        if "autoencoder" in kwargs.keys():
            if kwargs["autoencoder"]:
                if y:
                    raise ValueError(
                        "`y` should not be specified for autoencoder, remove `y` input."
                    )
                algo_url = "deeplearning"
    if not x: raise ValueError("Missing features")
    x = _check_frame(x, y, y)
    if validation_x:
        validation_x = _check_frame(validation_x, validation_y, y)

    # Send frame descriptions to H2O cluster
    train_key = x.send_frame()
    kwargs['training_frame'] = train_key
    if validation_x is not None:
        valid_key = validation_x.send_frame()
        kwargs['validation_frame'] = valid_key

    if y:
        kwargs['response_column'] = y._name

    kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None])

    # launch the job and poll
    job = H2OJob(H2OConnection.post_json("ModelBuilders/" + algo_url,
                                         **kwargs),
                 job_type=(algo_url + " Model Build")).poll()
    model_json = H2OConnection.get_json("Models/" + job.dest_key)["models"][0]
    model_type = model_json["output"]["model_category"]
    if model_type == "Binomial":
        from model.binomial import H2OBinomialModel
        model = H2OBinomialModel(job.dest_key, model_json)

    elif model_type == "Clustering":
        from model.clustering import H2OClusteringModel
        model = H2OClusteringModel(job.dest_key, model_json)

    elif model_type == "Regression":
        from model.regression import H2ORegressionModel
        model = H2ORegressionModel(job.dest_key, model_json)

    elif model_type == "Multinomial":
        from model.multinomial import H2OMultinomialModel
        model = H2OMultinomialModel(job.dest_key, model_json)

    elif model_type == "AutoEncoder":
        from model.autoencoder import H2OAutoEncoderModel
        model = H2OAutoEncoderModel(job.dest_key, model_json)

    else:
        print model_type
        raise NotImplementedError

    # Cleanup
    h2o.removeFrameShallow(train_key)
    if validation_x:
        h2o.removeFrameShallow(valid_key)

    return model