def _x_y_to_h2o_frame(X, y, sample_weight, params, X_valid, y_valid, sample_weight_valid): if isinstance(X, (DataFrame, Series)) & isinstance( y, (DataFrame, Series)): features = X.columns.tolist() if isinstance(X, DataFrame) else X.name target = y.columns.tolist() if isinstance(y, DataFrame) else y.name if (sample_weight is not None) & isinstance( sample_weight, (DataFrame, Series)): params['offset_column'] = (sample_weight.columns.tolist() if isinstance(sample_weight, DataFrame) else sample_weight.name) X = concat([X, sample_weight], axis=1) train_set = H2OFrame(concat([X, y], axis=1)) else: raise TypeError( 'X, y are supposed to be pandas DataFrame or Series') if (X_valid is not None) & (y_valid is not None): if isinstance(X_valid, (DataFrame, Series)) & isinstance( y_valid, (DataFrame, Series)): if ((sample_weight_valid is not None) & isinstance(sample_weight_valid, (DataFrame, Series)) & (sample_weight is not None)): X_valid = concat([X_valid, sample_weight_valid], axis=1) valid_set = H2OFrame(concat([X_valid, y_valid], axis=1)) params['validation_frame'] = valid_set else: raise TypeError( 'X_valid, y_valid are supposed to be pandas DataFrame or Series' ) return features, target, train_set, params
def col_names_check(): iris_wheader = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) expected_names = ["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"] assert iris_wheader.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names) iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv")) expected_names = ["C1", "C2", "C3", "C4", "C5"] assert iris.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris.col_names) df = H2OFrame.from_python(np.random.randn(100, 4).tolist(), column_names=list("ABCD"), column_types=["enum"] * 4) df.head() expected_names = list("ABCD") assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["enum"] * 4, \ "Expected {} for column types but got {}".format(["enum"] * 4, df.types) df = H2OFrame(np.random.randn(100, 4).tolist()) df.head() expected_names = ["C1", "C2", "C3", "C4"] assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["real"] * 4, \ "Expected {} for column types but got {}".format(["real"] * 4, df.types) df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']}) df.head() assert df.col_names == ["B"], "Expected {} for column names but got {}".format(["B"], df.col_names) df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"]) df.head() assert df.col_names == ["X"], "Expected {} for column names but got {}".format(["X"], df.col_names)
def pubdev_6360(): source = [[1, 'Peter', 'blah'], [2, 'Carl', ''], [3, 'Maria', 'whatever'], [4, 'Cindy', None]] expected = [[1, 'Peter', 1], [2, 'Carl', 0], [3, 'Maria', 1], [4, 'Cindy', 0]] columns = ['ID', 'Name', 'testcolumn'] sourcePandasFrame = pd.DataFrame(source, columns=columns) expectedPandasFrame = pd.DataFrame(expected, columns=columns) h2oFrame = H2OFrame(sourcePandasFrame) h2oFrame[h2oFrame['testcolumn'] != '', 'testcolumn'] = '1' try: h2oFrame[h2oFrame['testcolumn'] == '', 'testcolumn'] = '0' assert False, "H2O Frame operation should fail on an enum column" except Exception as e: assert 'Cannot assign value 1 into a vector of type Enum.' == e.args[ 0].msg, "H2O Frame operation failed on an unexpected error" h2oFrame = H2OFrame(sourcePandasFrame) h2oFrame['testcolumn'] = h2oFrame['testcolumn'].ascharacter() h2oFrame[h2oFrame['testcolumn'] != '', 'testcolumn'] = '1' h2oFrame[h2oFrame['testcolumn'] == '', 'testcolumn'] = '0' h2oFrame['testcolumn'] = h2oFrame['testcolumn'].asfactor() assert_frame_equal(h2oFrame.as_data_frame(use_pandas=True), expectedPandasFrame)
def test_sw_602_endpoints_equality(): data = [numpy.arange(0, 50000).tolist() for x in numpy.arange(0, 99).tolist()] fr = h2o.H2OFrame(data) full = H2OFrame.get_frame(fr.frame_id) light = H2OFrame.get_frame(fr.frame_id, light=True) assert full._ex._cache._id == light._ex._cache._id assert full._ex._cache._nrows == light._ex._cache._nrows assert full._ex._cache._ncols == light._ex._cache._ncols assert full._ex._cache._names == light._ex._cache._names assert full._ex._cache._data == light._ex._cache._data assert full._ex._cache._l == light._ex._cache._l
def test_sw_602_endpoints_equality(): data = [ numpy.arange(0, 50000).tolist() for x in numpy.arange(0, 99).tolist() ] fr = h2o.H2OFrame(data) full = H2OFrame.get_frame(fr.frame_id) light = H2OFrame.get_frame(fr.frame_id, light=True) assert full._ex._cache._id == light._ex._cache._id assert full._ex._cache._nrows == light._ex._cache._nrows assert full._ex._cache._ncols == light._ex._cache._ncols assert full._ex._cache._names == light._ex._cache._names assert full._ex._cache._data == light._ex._cache._data assert full._ex._cache._l == light._ex._cache._l
def transform(self, is_train_or_valid, frame = None, holdout_type = None, noise = -1, seed = -1): """ Apply transformation to `te_columns` based on the encoding maps generated during `TargetEncoder.fit()` call. You must not pass encodings manually from `.fit()` method because they are being stored internally after `.fit()' had been called. :param bool is_train_or_valid: explicitly specify type of the data. :param frame frame: to which frame we are applying target encoding transformations. :param str holdout_type: Supported options: 1) "kfold" - encodings for a fold are generated based on out-of-fold data. 2) "loo" - leave one out. Current row's response value is subtracted from the pre-calculated per-level frequencies. 3) "none" - we do not holdout anything. Using whole frame for training :param float noise: amount of noise to add to the final target encodings. :param int seed: set to fixed value for reproducibility. """ assert_is_type(holdout_type, "kfold", "loo", "none") # We need to make sure that frames are being sent in the same order assert self._encodingMap.map_keys['string'] == self._teColumns encodingMapKeys = self._encodingMap.map_keys['string'] encodingMapFramesKeys = list(map(lambda x: x['key']['name'], self._encodingMap.frames)) return H2OFrame._expr(expr=ExprNode("target.encoder.transform", encodingMapKeys, encodingMapFramesKeys, frame, self._teColumns, holdout_type, self._responseColumnName, self._foldColumnName, self._blending, self._inflectionPoint, self._smoothing, noise, seed, is_train_or_valid))
def result(self): """ Get result frame that contains information about the model building process like for modelselection and anovaglm. :return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm. """ return H2OFrame._expr(expr=ExprNode("result", ASTId(self.key)))._frame( fill_cache=True)
def as_frame(self): """ Converts this collection of models to a tabular representation. :returns: An H2OFrame, first columns identify the input segments, rest of the columns describe the built models. """ return H2OFrame._expr(expr=ExprNode("segment_models_as_frame", ASTId(self._segment_models_id)))._frame(fill_cache=True)
def baseline_survival_frame(self): if (self._model_json is not None and self._model_json.get("output", {}).get( "baseline_survival", {}).get("name") is not None): baseline_survival_name = self._model_json["output"][ "baseline_survival"]["name"] return H2OFrame.get_frame(baseline_survival_name)
def __init__(self, data, features, families="poisson", max_depth=10, iterations=1): self.data = data self.nD = data.shape[0] self.nF = data.shape[1] self.config = {"max_depth": max_depth, "iterations": iterations} self.vars = numpy.var(data, 0) self.means = numpy.mean(data, 0) assert self.nF == len(features) self.features = features if isinstance(families, str): families = [families] * self.nF updata = H2OFrame(numpytoordereddict(self.data, self.features)) self.models = {} for i, feature in enumerate(self.features): if self.vars[i] == 0: continue self.models[feature] = H2OGradientBoostingEstimator( distribution="poisson", ntrees=iterations, max_depth=max_depth) self.models[feature].train( x=[f for f in self.features if f != feature], y=feature, training_frame=updata)
def predict(self, X, sample_weight=None, **kwargs): """Predict using GLM with feature matrix X. Args: X (:obj:`pd.DataFrame`, :obj:`pd.Series`): Samples. sample_weight (:obj:`pd.DataFrame`, :obj:`pd.Series`, optional): Test sample weights. **kwargs: Other parameters passed to H2OGeneralizedLinearEstimator.predict(). Returns: array: Returns predicted values. """ if (self.backend == 'sklearn') & isinstance(self.model, Pipeline): predictions = self.model.predict(X if not hasattr( self.model, 'feature_name_') else X[self.model.feature_name_]) elif (self.backend == 'h2o') & isinstance( self.model, H2OGeneralizedLinearEstimator): if self.model.parms['offset_column'][ 'actual_value'] is not None and sample_weight is None: offset_name = self.model.parms['offset_column'][ 'actual_value']['column_name'] sample_weight = Series(repeat(0, len(X)), name=offset_name, index=X.index) if sample_weight is not None: X = concat([X, sample_weight], axis=1) h2o_predict = X if isinstance(X, H2OFrame) else H2OFrame(X) predictions = self.model.predict( h2o_predict, **kwargs).as_data_frame().values.reshape(-1) else: raise NotImplementedError( f'Error with the backend choice. Supported backends: {self._backends}' ) return predictions
def mapping_frame(self): if self._model_json is None: return None mj = self._model_json if mj.get("output", {}).get("mapping_frame", {}).get("name") is not None: mapping_frame_name = mj["output"]["mapping_frame"]["name"] return H2OFrame.get_frame(mapping_frame_name)
def transform(self, column): check_is_fitted(self, 'encoder_') column = h2o_col_to_numpy(column) # transform-- # I don't like that we have to re-upload... but we do... for now... return H2OFrame.from_python(self.encoder_.transform(column).reshape(column.shape[0], 1))
def _as_h2o_frame_from_RDD_Double(h2oContext, rdd, frame_name, full_cols=-1): key = h2oContext._jhc.asH2OFrameFromPythonRDDDoubleKeyString( rdd._to_java_object_rdd(), frame_name) return H2OFrame.get_frame(key, full_cols=full_cols, light=True)
def event_log(self): """ retrieve the backend event log from an H2OAutoML object :return: an H2OFrame with detailed events occurred during the AutoML training. """ return H2OFrame([]) if self._event_log is None else self._event_log
def from_pandas(X): """A simple wrapper for H2OFrame.from_python. This takes a pandas dataframe and returns an H2OFrame with all the default args (generally enough) plus named columns. Parameters ---------- X : pd.DataFrame The dataframe to convert. Returns ------- H2OFrame """ pd, _ = validate_is_pd(X, None) # older version of h2o are super funky with this if parse_version(h2o.__version__) < parse_version('3.10.0.7'): h = 1 else: h = 0 # if h2o hasn't started, we'll let this fail through return H2OFrame.from_python(X, header=h, column_names=X.columns.tolist())
def transform(self, frame=None, holdout_type=None, noise=-1, seed=-1): """ Apply transformation to `te_columns` based on the encoding maps generated during `TargetEncoder.fit()` call. You must not pass encodings manually from `.fit()` method because they are being stored internally after `.fit()' had been called. :param frame frame: to which frame we are applying target encoding transformations. :param str holdout_type: Supported options: 1) "kfold" - encodings for a fold are generated based on out-of-fold data. 2) "loo" - leave one out. Current row's response value is subtracted from the pre-calculated per-level frequencies. 3) "none" - we do not holdout anything. Using whole frame for training :param float noise: the amount of random noise added to the target encoding. This helps prevent overfitting. Defaults to 0.01 * range of y. :param int seed: a random seed used to generate draws from the uniform distribution for random noise. Defaults to -1. """ assert_is_type(holdout_type, "kfold", "loo", "none") # We need to make sure that frames are being sent in the same order assert self._encodingMap.map_keys['string'] == self._teColumns encodingMapKeys = self._encodingMap.map_keys['string'] encodingMapFramesKeys = list(map(lambda x: x['key']['name'], self._encodingMap.frames)) return H2OFrame._expr(expr=ExprNode("target.encoder.transform", encodingMapKeys, encodingMapFramesKeys, frame, self._teColumns, holdout_type, self._responseColumnName, self._foldColumnName, self._blending, self._inflectionPoint, self._smoothing, noise, seed))
def transform(self, frame=None, holdout_type=None, noise=-1, seed=-1): """ Apply transformation to `te_columns` based on the encoding maps generated during `TargetEncoder.fit()` call. You must not pass encodings manually from `.fit()` method because they are being stored internally after `.fit()' had been called. :param frame frame: to which frame we are applying target encoding transformations. :param str holdout_type: Supported options: 1) "kfold" - encodings for a fold are generated based on out-of-fold data. 2) "loo" - leave one out. Current row's response value is subtracted from the pre-calculated per-level frequencies. 3) "none" - we do not holdout anything. Using whole frame for training :param float noise: the amount of random noise added to the target encoding. This helps prevent overfitting. Defaults to 0.01 * range of y. :param int seed: a random seed used to generate draws from the uniform distribution for random noise. Defaults to -1. """ assert_is_type(holdout_type, "kfold", "loo", "none") # We need to make sure that frames are being sent in the same order assert self._encodingMap.map_keys['string'] == self._teColumns encodingMapKeys = self._encodingMap.map_keys['string'] encodingMapFramesKeys = list( map(lambda x: x['key']['name'], self._encodingMap.frames)) return H2OFrame._expr(expr=ExprNode( "target.encoder.transform", encodingMapKeys, encodingMapFramesKeys, frame, self._teColumns, holdout_type, self._responseColumnName, self._foldColumnName, self._blending, self._inflectionPoint, self._smoothing, noise, seed))
def train(self, x=None, y=None, training_frame=None, blending_frame=None, **kwargs): has_training_frame = training_frame is not None or self.training_frame is not None blending_frame = H2OFrame._validate(blending_frame, 'blending_frame', required=not has_training_frame) if not has_training_frame: training_frame = blending_frame # used to bypass default checks in super class and backend and to guarantee default metrics def extend_parms(parms): if blending_frame is not None: parms['blending_frame'] = blending_frame if self.metalearner_fold_column is not None: parms['ignored_columns'].remove( quoted(self.metalearner_fold_column)) super(self.__class__, self)._train(x, y, training_frame, extend_parms_fn=extend_parms, **kwargs)
def from_java_h2o_frame(h2o_frame, h2o_frame_id): # Cache Java reference to the backend frame sid = h2o_frame_id.toString() fr = H2OFrame.get_frame(sid) fr._java_frame = h2o_frame fr._java_frame_sid = sid fr._backed_by_java_obj = True return fr
def fit(self, fr, **fit_params): res = [] for step in self.steps: res.append(step[1].to_rest(step[0])) res = "[" + ",".join([quoted(r.replace('"', "'")) for r in res]) + "]" j = h2o.api("POST /99/Assembly", data={"steps": res, "frame": fr.frame_id}) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols=100): j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame, j_h2o_frame_key, full_cols)
def _as_h2o_frame_from_RDD_String(h2oContext, rdd, frame_name, full_cols=100): j_h2o_frame = h2oContext._jhc.asH2OFrameFromRDDString( rdd._to_java_object_rdd(), frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame, j_h2o_frame_key, full_cols)
def from_java_h2o_frame(h2o_frame, h2o_frame_id, full_cols=100): # Cache Java reference to the backend frame sid = h2o_frame_id.toString() cols = full_cols if h2o_frame.numCols() > full_cols else -1 fr = H2OFrame.get_frame(sid, full_cols=cols, light=True) fr._java_frame = h2o_frame fr._java_frame_sid = sid fr._backed_by_java_obj = True return fr
def _as_h2o_frame_from_complex_type(h2oContext,dataframe, framename): # Creates a DataFrame from an RDD of tuple/list, list or pandas.DataFrame. # On scala backend, to transform RDD of Product to H2OFrame, we need to know Type Tag. # Since there is no alternative for Product class in Python, we first transform the rdd to dataframe # and then transform it to H2OFrame. df = h2oContext._sqlContext.createDataFrame(dataframe) j_h2o_frame = h2oContext._jhc.asH2OFrame(df._jdf) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key)
def _as_h2o_frame_from_complex_type(h2oContext, dataframe, frame_name, full_cols=100): # Creates a DataFrame from an RDD of tuple/list, list or pandas.DataFrame. # On scala backend, to transform RDD of Product to H2OFrame, we need to know Type Tag. # Since there is no alternative for Product class in Python, we first transform the rdd to dataframe # and then transform it to H2OFrame. df = h2oContext._spark_session.createDataFrame(dataframe) j_h2o_frame = h2oContext._jhc.asH2OFrame(df._jdf, frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key, full_cols)
def transform_frame(self, fr): """ GLRM performs A=X*Y during training. When a new dataset is given, GLRM will perform Anew = Xnew*Y. When predict is called, Xnew*Y is returned. When transform_frame is called, Xnew is returned instead. :return: an H2OFrame that contains Xnew. """ return H2OFrame._expr( expr=ExprNode("transform", ASTId(self.key), ASTId(fr.key)))._frame( fill_cache=True)
def col_names_check(): iris_wheader = h2o.import_file( pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) expected_names = [ "sepal_len", "sepal_wid", "petal_len", "petal_wid", "class" ] assert iris_wheader.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names) iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv")) expected_names = ["C1", "C2", "C3", "C4", "C5"] assert iris.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris.col_names) df = H2OFrame.from_python(np.random.randn(100, 4).tolist(), column_names=list("ABCD"), column_types=["enum"] * 4) df.head() expected_names = list("ABCD") assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["enum"] * 4, \ "Expected {} for column types but got {}".format(["enum"] * 4, df.types) df = H2OFrame(np.random.randn(100, 4).tolist()) df.head() expected_names = ["C1", "C2", "C3", "C4"] assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["real"] * 4, \ "Expected {} for column types but got {}".format(["real"] * 4, df.types) df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']}) df.head() assert df.col_names == [ "B" ], "Expected {} for column names but got {}".format(["B"], df.col_names) df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"]) df.head() assert df.col_names == [ "X" ], "Expected {} for column names but got {}".format(["X"], df.col_names)
def pubdev_5179(): data = [numpy.arange(0, 20).tolist() for x in numpy.arange(0, 20).tolist()] fr = h2o.H2OFrame(data) light = H2OFrame.get_frame(fr.frame_id, full_cols=10) # only first 10 columns will be returned with data # verify that light frame have all columns assert len(light.columns) == 20 assert len(light.types) == 20 assert len(light._ex._cache._data) == 10 # But only data for 10 columns is available
def _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols=100): if dataframe.count() == 0: raise ValueError('Cannot transform empty H2OFrame') j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame, j_h2o_frame_key, full_cols)
def make_prediction(self, text: str) -> float: """ 0 - not a dad joke 1 - dad joke """ joke = H2OFrame([[text]]) joke.col_names = ["dad_joke"] predict_var = self._predict(joke) x = predict_var.as_data_frame() return x['predict'][0]
def test_h2o_frame_2_data_frame_new(self): hc = self._hc h2o_frame = H2OFrame(file_path="../examples/smalldata/prostate.csv") df = hc.as_spark_frame(h2o_frame) self.assertEquals(df.count(), h2o_frame.nrow, "Number of rows should match") self.assertEquals(len(df.columns), h2o_frame.ncol, "Number of columns should match") self.assertEquals(df.columns, h2o_frame.names, "Column names should match")
def _as_h2o_frame_from_complex_type(h2oContext, dataframe, frame_name, full_cols=-1): # Creates a DataFrame from an RDD of tuple/list, list or pandas.DataFrame. # On scala backend, to transform RDD of Product to H2OFrame, we need to know Type Tag. # Since there is no alternative for Product class in Python, we first transform the rdd to dataframe # and then transform it to H2OFrame. df = h2oContext._spark_session.createDataFrame(dataframe) key = h2oContext._jhc.asH2OFrameKeyString(df._jdf, frame_name) return H2OFrame.get_frame(key, full_cols=full_cols, light=True)
def fit(self, fr): assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join( quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={ "steps": steps, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def train(self, x=None, y=None, training_frame=None, blending_frame=None, **kwargs): blending_frame = H2OFrame._validate(blending_frame, 'blending_frame', required=False) def extend_parms(parms): if blending_frame is not None: parms['blending_frame'] = blending_frame if self.metalearner_fold_column is not None: parms['ignored_columns'].remove(quoted(self.metalearner_fold_column)) super(self.__class__, self)._train(x, y, training_frame, extend_parms_fn=extend_parms, **kwargs)
def fit(self, fr): """ To perform the munging operations on a frame specified in steps on the frame fr. :param fr: H2OFrame where munging operations are to be performed on. :return: H2OFrame after munging operations are completed. """ assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join(quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={"steps": steps, "frame": fr.frame_id}) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def fit(self, fr, **fit_params): res = [] for step in self.steps: res.append(step[1].to_rest(step[0])) res = "[" + ",".join([quoted(r.replace('"', "'")) for r in res]) + "]" j = h2o.api("POST /99/Assembly", data={ "steps": res, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def rbind(*data): slf = data[0] nrow_sum = 0 for frame in data: if frame.ncol != slf.ncol: raise ValueError("Cannot row-bind a dataframe with %d columns to a data frame with %d columns: " "the columns must match" % (frame.ncol, slf.ncol)) if frame.columns != slf.columns or frame.types != slf.types: raise ValueError("Column names and types must match for rbind() to work") nrow_sum += frame.nrow fr = H2OFrame._expr(expr=ExprNode("rbind", slf, *data[1:]), cache=slf._ex._cache) fr._ex._cache.nrows = nrow_sum return fr
def _as_h2o_frame_from_RDD_Double(h2oContext, rdd, frame_name): j_h2o_frame = h2oContext._jhc.asH2OFrameFromPythonRDDDouble(rdd._to_java_object_rdd(), frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key)
def aggregated_frame(self): if (self._model_json is not None and self._model_json.get("output", {}).get("output_frame", {}).get("name") is not None): out_frame_name = self._model_json["output"]["output_frame"]["name"] return H2OFrame.get_frame(out_frame_name)
def _as_h2o_frame_from_RDD_Long(h2oContext, rdd, framename): j_h2o_frame = h2oContext._jhc.asH2OFrameFromRDDLong(rdd._to_java_object_rdd(), framename) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key)
def _as_h2o_frame_from_RDD_String(h2oContext, rdd, frame_name, full_cols=100): j_h2o_frame = h2oContext._jhc.asH2OFrameFromRDDString(rdd._to_java_object_rdd(), frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key, full_cols)
def _as_h2o_frame_from_dataframe(h2oContext, dataframe, framename): if dataframe.count() == 0: raise ValueError('Cannot transform empty H2OFrame') j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, framename) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key)
def _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols=100): j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, frame_name) j_h2o_frame_key = j_h2o_frame.key() return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key, full_cols)
def from_java_h2o_frame(h2o_frame, h2o_frame_id): fr = H2OFrame.get_frame(h2o_frame_id.toString()) fr._java_frame = h2o_frame fr._backed_by_java_obj = True return fr
def dataframe_2_h2oframe_by_id(dataframe_id): res = h2o.H2OConnection.post("dataframes/" + urllib.quote(dataframe_id) + "/h2oframe").json() h2oframe = H2OFrame.get_frame(res["h2oframe_id"]) return h2oframe