def test_sw_602_endpoints_equality(): data = [numpy.arange(0, 50000).tolist() for x in numpy.arange(0, 99).tolist()] fr = h2o.H2OFrame(data) full = H2OFrame.get_frame(fr.frame_id) light = H2OFrame.get_frame(fr.frame_id, light=True) assert full._ex._cache._id == light._ex._cache._id assert full._ex._cache._nrows == light._ex._cache._nrows assert full._ex._cache._ncols == light._ex._cache._ncols assert full._ex._cache._names == light._ex._cache._names assert full._ex._cache._data == light._ex._cache._data assert full._ex._cache._l == light._ex._cache._l
def test_sw_602_endpoints_equality(): data = [ numpy.arange(0, 50000).tolist() for x in numpy.arange(0, 99).tolist() ] fr = h2o.H2OFrame(data) full = H2OFrame.get_frame(fr.frame_id) light = H2OFrame.get_frame(fr.frame_id, light=True) assert full._ex._cache._id == light._ex._cache._id assert full._ex._cache._nrows == light._ex._cache._nrows assert full._ex._cache._ncols == light._ex._cache._ncols assert full._ex._cache._names == light._ex._cache._names assert full._ex._cache._data == light._ex._cache._data assert full._ex._cache._l == light._ex._cache._l
def mapping_frame(self): if self._model_json is None: return None mj = self._model_json if mj.get("output", {}).get("mapping_frame", {}).get("name") is not None: mapping_frame_name = mj["output"]["mapping_frame"]["name"] return H2OFrame.get_frame(mapping_frame_name)
def _as_h2o_frame_from_RDD_Double(h2oContext, rdd, frame_name, full_cols=-1): key = h2oContext._jhc.asH2OFrameFromPythonRDDDoubleKeyString( rdd._to_java_object_rdd(), frame_name) return H2OFrame.get_frame(key, full_cols=full_cols, light=True)
def baseline_survival_frame(self): if (self._model_json is not None and self._model_json.get("output", {}).get( "baseline_survival", {}).get("name") is not None): baseline_survival_name = self._model_json["output"][ "baseline_survival"]["name"] return H2OFrame.get_frame(baseline_survival_name)
def fit(self, fr, **fit_params): res = [] for step in self.steps: res.append(step[1].to_rest(step[0])) res = "[" + ",".join([quoted(r.replace('"', "'")) for r in res]) + "]" j = h2o.api("POST /99/Assembly", data={"steps": res, "frame": fr.frame_id}) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def from_java_h2o_frame(h2o_frame, h2o_frame_id): # Cache Java reference to the backend frame sid = h2o_frame_id.toString() fr = H2OFrame.get_frame(sid) fr._java_frame = h2o_frame fr._java_frame_sid = sid fr._backed_by_java_obj = True return fr
def from_java_h2o_frame(h2o_frame, h2o_frame_id, full_cols=100): # Cache Java reference to the backend frame sid = h2o_frame_id.toString() cols = full_cols if h2o_frame.numCols() > full_cols else -1 fr = H2OFrame.get_frame(sid, full_cols=cols, light=True) fr._java_frame = h2o_frame fr._java_frame_sid = sid fr._backed_by_java_obj = True return fr
def pubdev_5179(): data = [numpy.arange(0, 20).tolist() for x in numpy.arange(0, 20).tolist()] fr = h2o.H2OFrame(data) light = H2OFrame.get_frame(fr.frame_id, full_cols=10) # only first 10 columns will be returned with data # verify that light frame have all columns assert len(light.columns) == 20 assert len(light.types) == 20 assert len(light._ex._cache._data) == 10 # But only data for 10 columns is available
def _as_h2o_frame_from_complex_type(h2oContext, dataframe, frame_name, full_cols=-1): # Creates a DataFrame from an RDD of tuple/list, list or pandas.DataFrame. # On scala backend, to transform RDD of Product to H2OFrame, we need to know Type Tag. # Since there is no alternative for Product class in Python, we first transform the rdd to dataframe # and then transform it to H2OFrame. df = h2oContext._spark_session.createDataFrame(dataframe) key = h2oContext._jhc.asH2OFrameKeyString(df._jdf, frame_name) return H2OFrame.get_frame(key, full_cols=full_cols, light=True)
def fit(self, fr, **fit_params): res = [] for step in self.steps: res.append(step[1].to_rest(step[0])) res = "[" + ",".join([quoted(r.replace('"', "'")) for r in res]) + "]" j = h2o.api("POST /99/Assembly", data={ "steps": res, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def fit(self, fr): """ To perform the munging operations on a frame specified in steps on the frame fr. :param fr: H2OFrame where munging operations are to be performed on. :return: H2OFrame after munging operations are completed. """ assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join(quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={"steps": steps, "frame": fr.frame_id}) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def fit(self, fr): assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join( quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={ "steps": steps, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def pubdev_5179(): data = [numpy.arange(0, 20).tolist() for x in numpy.arange(0, 20).tolist()] fr = h2o.H2OFrame(data) light = H2OFrame.get_frame( fr.frame_id, full_cols=10) # only first 10 columns will be returned with data # verify that light frame have all columns assert len(light.columns) == 20 assert len(light.types) == 20 assert len(light._ex._cache._data ) == 10 # But only data for 10 columns is available
def fit(self, fr): """ To perform the munging operations on a frame specified in steps on the frame fr. :param fr: H2OFrame where munging operations are to be performed on. :return: H2OFrame after munging operations are completed. """ assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join( quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={ "steps": steps, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def asH2OFrame(self, sparkFrame, h2oFrameName=None, fullCols=-1): """ Transforms given Spark RDD or DataFrame to H2OFrame. Parameters ---------- sparkFrame : Spark RDD or DataFrame h2oFrameName : Optional name for resulting H2OFrame fullCols : number of first n columns which are sent to the client together with the data Returns ------- H2OFrame which contains data of original input Spark data structure """ assert_is_type(sparkFrame, DataFrame, RDD) df = H2OContext.__prepareSparkDataForConversion(sparkFrame) if h2oFrameName is None: key = self._jhc.asH2OFrameKeyString(df._jdf) else: key = self._jhc.asH2OFrameKeyString(df._jdf, h2oFrameName) return H2OFrame.get_frame(key, full_cols=fullCols, light=True)
def fit(self, fr): """ To perform the munging operations on a frame specified in steps on the frame fr. :param fr: H2OFrame where munging operations are to be performed on. :return: H2OFrame after munging operations are completed. :examples: >>> iris = h2o.load_dataset("iris") >>> assembly = H2OAssembly(steps=[("col_select", ... H2OColSelect(["Sepal.Length", ... "Petal.Length", "Species"])), ... ("cos_Sepal.Length", ... H2OColOp(op=H2OFrame.cos, ... col="Sepal.Length", ... inplace=True)), ... ("str_cnt_Species", ... H2OColOp(op=H2OFrame.countmatches, ... col="Species", ... inplace=False, ... pattern="s"))]) >>> fit = assembly.fit(iris) >>> fit """ assert_is_type(fr, H2OFrame) steps = "[%s]" % ",".join( quoted(step[1].to_rest(step[0]).replace('"', "'")) for step in self.steps) j = h2o.api("POST /99/Assembly", data={ "steps": steps, "frame": fr.frame_id }) self.id = j["assembly"]["name"] return H2OFrame.get_frame(j["result"]["name"])
def from_java_h2o_frame(h2o_frame, h2o_frame_id): fr = H2OFrame.get_frame(h2o_frame_id.toString()) fr._java_frame = h2o_frame fr._backed_by_java_obj = True return fr
def frame_in_cluster(frame): # reload the first row of the frame to verify that no vec has been removed return frame.key is not None and H2OFrame.get_frame(frame.key, rows=1) is not None
def dataframe_2_h2oframe_by_id(dataframe_id): res = h2o.H2OConnection.post("dataframes/" + urllib.quote(dataframe_id) + "/h2oframe").json() h2oframe = H2OFrame.get_frame(res["h2oframe_id"]) return h2oframe
def aggregated_frame(self): if (self._model_json is not None and self._model_json.get("output", {}).get("output_frame", {}).get("name") is not None): out_frame_name = self._model_json["output"]["output_frame"]["name"] return H2OFrame.get_frame(out_frame_name)
def _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols=-1): key = h2oContext._jhc.asH2OFrameKeyString(dataframe._jdf, frame_name) return H2OFrame.get_frame(key, full_cols=full_cols, light=True)