def col_names_check(): iris_wheader = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) expected_names = ["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"] assert iris_wheader.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names) iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv")) expected_names = ["C1", "C2", "C3", "C4", "C5"] assert iris.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris.col_names) df = H2OFrame.from_python(np.random.randn(100, 4).tolist(), column_names=list("ABCD"), column_types=["enum"] * 4) df.head() expected_names = list("ABCD") assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["enum"] * 4, \ "Expected {} for column types but got {}".format(["enum"] * 4, df.types) df = H2OFrame(np.random.randn(100, 4).tolist()) df.head() expected_names = ["C1", "C2", "C3", "C4"] assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["real"] * 4, \ "Expected {} for column types but got {}".format(["real"] * 4, df.types) df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']}) df.head() assert df.col_names == ["B"], "Expected {} for column names but got {}".format(["B"], df.col_names) df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"]) df.head() assert df.col_names == ["X"], "Expected {} for column names but got {}".format(["X"], df.col_names)
def from_pandas(X): """A simple wrapper for H2OFrame.from_python. This takes a pandas dataframe and returns an H2OFrame with all the default args (generally enough) plus named columns. Parameters ---------- X : pd.DataFrame The dataframe to convert. Returns ------- H2OFrame """ pd, _ = validate_is_pd(X, None) # older version of h2o are super funky with this if parse_version(h2o.__version__) < parse_version('3.10.0.7'): h = 1 else: h = 0 # if h2o hasn't started, we'll let this fail through return H2OFrame.from_python(X, header=h, column_names=X.columns.tolist())
def transform(self, column): check_is_fitted(self, 'encoder_') column = h2o_col_to_numpy(column) # transform-- # I don't like that we have to re-upload... but we do... for now... return H2OFrame.from_python(self.encoder_.transform(column).reshape(column.shape[0], 1))
def col_names_check(): iris_wheader = h2o.import_file( pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) expected_names = [ "sepal_len", "sepal_wid", "petal_len", "petal_wid", "class" ] assert iris_wheader.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names) iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv")) expected_names = ["C1", "C2", "C3", "C4", "C5"] assert iris.col_names == expected_names, \ "Expected {0} for column names but got {1}".format(expected_names, iris.col_names) df = H2OFrame.from_python(np.random.randn(100, 4).tolist(), column_names=list("ABCD"), column_types=["enum"] * 4) df.head() expected_names = list("ABCD") assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["enum"] * 4, \ "Expected {} for column types but got {}".format(["enum"] * 4, df.types) df = H2OFrame(np.random.randn(100, 4).tolist()) df.head() expected_names = ["C1", "C2", "C3", "C4"] assert df.col_names == expected_names, \ "Expected {} for column names but got {}".format(expected_names, df.col_names) assert list(df.types.values()) == ["real"] * 4, \ "Expected {} for column types but got {}".format(["real"] * 4, df.types) df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']}) df.head() assert df.col_names == [ "B" ], "Expected {} for column names but got {}".format(["B"], df.col_names) df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"]) df.head() assert df.col_names == [ "X" ], "Expected {} for column names but got {}".format(["X"], df.col_names)