Beispiel #1
0
def col_names_check():
    iris_wheader = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    expected_names = ["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"]
    assert iris_wheader.col_names == expected_names, \
        "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names)

    iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv"))
    expected_names = ["C1", "C2", "C3", "C4", "C5"]
    assert iris.col_names == expected_names, \
        "Expected {0} for column names but got {1}".format(expected_names, iris.col_names)

    df = H2OFrame.from_python(np.random.randn(100, 4).tolist(), column_names=list("ABCD"), column_types=["enum"] * 4)
    df.head()
    expected_names = list("ABCD")
    assert df.col_names == expected_names, \
        "Expected {} for column names but got {}".format(expected_names, df.col_names)
    assert list(df.types.values()) == ["enum"] * 4, \
        "Expected {} for column types but got {}".format(["enum"] * 4, df.types)

    df = H2OFrame(np.random.randn(100, 4).tolist())
    df.head()
    expected_names = ["C1", "C2", "C3", "C4"]
    assert df.col_names == expected_names, \
        "Expected {} for column names but got {}".format(expected_names, df.col_names)
    assert list(df.types.values()) == ["real"] * 4, \
        "Expected {} for column types but got {}".format(["real"] * 4, df.types)

    df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']})
    df.head()
    assert df.col_names == ["B"], "Expected {} for column names but got {}".format(["B"], df.col_names)

    df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"])
    df.head()
    assert df.col_names == ["X"], "Expected {} for column names but got {}".format(["X"], df.col_names)
Beispiel #2
0
def from_pandas(X):
    """A simple wrapper for H2OFrame.from_python. This takes
    a pandas dataframe and returns an H2OFrame with all the 
    default args (generally enough) plus named columns.

    Parameters
    ----------

    X : pd.DataFrame
        The dataframe to convert.

    Returns
    -------

    H2OFrame
    """
    pd, _ = validate_is_pd(X, None)

    # older version of h2o are super funky with this
    if parse_version(h2o.__version__) < parse_version('3.10.0.7'):
        h = 1
    else:
        h = 0

    # if h2o hasn't started, we'll let this fail through
    return H2OFrame.from_python(X, header=h, column_names=X.columns.tolist())
Beispiel #3
0
def from_pandas(X):
    """A simple wrapper for H2OFrame.from_python. This takes
    a pandas dataframe and returns an H2OFrame with all the 
    default args (generally enough) plus named columns.

    Parameters
    ----------

    X : pd.DataFrame
        The dataframe to convert.

    Returns
    -------

    H2OFrame
    """
    pd, _ = validate_is_pd(X, None)

    # older version of h2o are super funky with this
    if parse_version(h2o.__version__) < parse_version('3.10.0.7'):
        h = 1
    else:
        h = 0

    # if h2o hasn't started, we'll let this fail through
    return H2OFrame.from_python(X, header=h, column_names=X.columns.tolist())
Beispiel #4
0
    def transform(self, column):
        check_is_fitted(self, 'encoder_')
        column = h2o_col_to_numpy(column)

        # transform--
        # I don't like that we have to re-upload... but we do... for now...
        return H2OFrame.from_python(self.encoder_.transform(column).reshape(column.shape[0], 1))
Beispiel #5
0
def col_names_check():
    iris_wheader = h2o.import_file(
        pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    expected_names = [
        "sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"
    ]
    assert iris_wheader.col_names == expected_names, \
        "Expected {0} for column names but got {1}".format(expected_names, iris_wheader.col_names)

    iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv"))
    expected_names = ["C1", "C2", "C3", "C4", "C5"]
    assert iris.col_names == expected_names, \
        "Expected {0} for column names but got {1}".format(expected_names, iris.col_names)

    df = H2OFrame.from_python(np.random.randn(100, 4).tolist(),
                              column_names=list("ABCD"),
                              column_types=["enum"] * 4)
    df.head()
    expected_names = list("ABCD")
    assert df.col_names == expected_names, \
        "Expected {} for column names but got {}".format(expected_names, df.col_names)
    assert list(df.types.values()) == ["enum"] * 4, \
        "Expected {} for column types but got {}".format(["enum"] * 4, df.types)

    df = H2OFrame(np.random.randn(100, 4).tolist())
    df.head()
    expected_names = ["C1", "C2", "C3", "C4"]
    assert df.col_names == expected_names, \
        "Expected {} for column names but got {}".format(expected_names, df.col_names)
    assert list(df.types.values()) == ["real"] * 4, \
        "Expected {} for column types but got {}".format(["real"] * 4, df.types)

    df = H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']})
    df.head()
    assert df.col_names == [
        "B"
    ], "Expected {} for column names but got {}".format(["B"], df.col_names)

    df = H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']},
                              column_names=["X"])
    df.head()
    assert df.col_names == [
        "X"
    ], "Expected {} for column names but got {}".format(["X"], df.col_names)