def as_spark_frame(self, h2o_frame, copy_metadata=True): """ Transforms given H2OFrame to Spark DataFrame Parameters ---------- h2o_frame : H2OFrame copy_metadata: Bool = True Returns ------- Spark DataFrame """ if isinstance(h2o_frame, H2OFrame): j_h2o_frame = h2o_frame.get_java_h2o_frame() jdf = self._jhc.asDataFrame(j_h2o_frame, copy_metadata, self._jsql_context) df = DataFrame(jdf, self._sql_context) # Attach h2o_frame to dataframe which forces python not to delete the frame when we leave the scope of this # method. # Without this, after leaving this method python would garbage collect the frame since it's not used # anywhere and spark. when executing any action on this dataframe, will fail since the frame # would be missing. df._h2o_frame = h2o_frame return df
def asSparkFrame(self, h2oFrame, copyMetadata=True): """ Transforms given H2OFrame to Spark DataFrame Parameters ---------- h2oFrame : H2OFrame copyMetadata: Bool = True Returns ------- Spark DataFrame """ if isinstance(h2oFrame, H2OFrame): frame_id = h2oFrame.frame_id jdf = self._jhc.asDataFrame(frame_id, copyMetadata) sqlContext = SparkSession.builder.getOrCreate()._wrapped df = DataFrame(jdf, sqlContext) # Attach h2o_frame to dataframe which forces python not to delete the frame when we leave the scope of this # method. # Without this, after leaving this method python would garbage collect the frame since it's not used # anywhere and spark. when executing any action on this dataframe, will fail since the frame # would be missing. df._h2o_frame = h2oFrame return df
def as_spark_frame(self, h2o_frame, copy_metadata=True): """ Transforms given H2OFrame to Spark DataFrame Parameters ---------- h2o_frame : H2OFrame copy_metadata: Bool = True Returns ------- Spark DataFrame """ if isinstance(h2o_frame, H2OFrame): j_h2o_frame = h2o_frame.get_java_h2o_frame() jdf = self._jhc.asDataFrame(j_h2o_frame, copy_metadata) df = DataFrame(jdf, self._sql_context) # Attach h2o_frame to dataframe which forces python not to delete the frame when we leave the scope of this # method. # Without this, after leaving this method python would garbage collect the frame since it's not used # anywhere and spark. when executing any action on this dataframe, will fail since the frame # would be missing. df._h2o_frame = h2o_frame return df