コード例 #1
0
def interaction_check():
    # Connect to a pre-existing cluster
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

    # add a couple of factor columns to iris
    iris = iris.cbind(iris[4] == "Iris-setosa")
    iris[5] = iris[5].asfactor()
    iris.set_name(5, "C6")

    iris = iris.cbind(iris[4] == "Iris-virginica")
    iris[6] = iris[6].asfactor()
    iris.set_name(6, name="C7")

    # create a frame of the two-way interactions
    two_way_interactions = h2o.interaction(iris,
                                           factors=[4, 5, 6],
                                           pairwise=True,
                                           max_factors=10000,
                                           min_occurrence=1)
    assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
        "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
        "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
    levels1 = two_way_interactions[0].levels()
    levels2 = two_way_interactions[1].levels()
    levels3 = two_way_interactions[2].levels()

    assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                levels1)
    assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                levels2)
    assert levels3 == [
        "0_0", "1_0", "0_1"
    ], "Expected the following levels {0}, but got {1}".format(
        ["0_0", "1_0", "0_1"], levels3)

    # do the same thing, but set 'factors' arg to list of column names
    two_way_interactions = h2o.interaction(iris,
                                           factors=["C5", "C6", "C7"],
                                           pairwise=True,
                                           max_factors=10000,
                                           min_occurrence=1)
    assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
        "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
        "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
    levels1 = two_way_interactions[0].levels()
    levels2 = two_way_interactions[1].levels()
    levels3 = two_way_interactions[2].levels()

    assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                levels1)
    assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                levels2)
    assert levels3 == [
        "0_0", "1_0", "0_1"
    ], "Expected the following levels {0}, but got {1}".format(
        ["0_0", "1_0", "0_1"], levels3)
コード例 #2
0
def interaction_check():
    # Connect to a pre-existing cluster
    

    iris = h2o.import_file(path=h2o.locate("smalldata/iris/iris.csv"))

    # add a couple of factor columns to iris
    iris = iris.cbind(iris[4] == "Iris-setosa")
    iris[5] = iris[5].asfactor()
    iris.setName(5,"C6")

    iris = iris.cbind(iris[4] == "Iris-virginica")
    iris[6] = iris[6].asfactor()
    iris.setName(6, name="C7")

    # create a frame of the two-way interactions
    two_way_interactions = h2o.interaction(iris, factors=[4,5,6], pairwise=True, max_factors=10000, min_occurrence=1)
    assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
        "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
        "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
    levels1 = two_way_interactions[0].levels()
    levels2 = two_way_interactions[1].levels()
    levels3 = two_way_interactions[2].levels()

    assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                levels1)
    assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                levels2)
    assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"],
                                                                                                     levels3)


    # do the same thing, but set 'factors' arg to list of column names
    two_way_interactions = h2o.interaction(iris, factors=["C5","C6","C7"], pairwise=True, max_factors=10000, min_occurrence=1)
    assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
        "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
        "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
    levels1 = two_way_interactions[0].levels()
    levels2 = two_way_interactions[1].levels()
    levels3 = two_way_interactions[2].levels()

    assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                levels1)
    assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                levels2)
    assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"],
                                                                                                     levels3)
コード例 #3
0
ファイル: frame.py プロジェクト: Vishnu24/h2o-3
  def interaction(self, factors, pairwise, max_factors, min_occurrence, destination_frame=None):
    """
    Categorical Interaction Feature Creation in H2O.
    Creates a frame in H2O with n-th order interaction features between categorical columns, as specified by
    the user.

    :param factors: factors Factor columns (either indices or column names).
    :param pairwise: Whether to create pairwise interactions between factors (otherwise create one higher-order interaction). Only applicable if there are 3 or more factors.
    :param max_factors: Max. number of factor levels in pair-wise interaction terms (if enforced, one extra catch-all factor will be made)
    :param min_occurrence: Min. occurrence threshold for factor levels in pair-wise interaction terms
    :param destination_frame: A string indicating the destination key. If empty, this will be auto-generated by H2O.
    :return: H2OFrame
    """
    return h2o.interaction(data=self, factors=factors, pairwise=pairwise, max_factors=max_factors,
                           min_occurrence=min_occurrence, destination_frame=destination_frame)
コード例 #4
0
ファイル: pyunit_h2ointeraction.py プロジェクト: ysj89/h2o-3
def h2ointeraction():
    """
    Python API test: h2o.interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None)

    Copied from pyunit_interaction.py
    """

    try:
        iris = h2o.import_file(
            path=pyunit_utils.locate("smalldata/iris/iris.csv"))

        # add a couple of factor columns to iris
        iris = iris.cbind(iris[4] == "Iris-setosa")
        iris[5] = iris[5].asfactor()
        iris.set_name(5, "C6")

        iris = iris.cbind(iris[4] == "Iris-virginica")
        iris[6] = iris[6].asfactor()
        iris.set_name(6, name="C7")

        # create a frame of the two-way interactions
        two_way_interactions = h2o.interaction(iris,
                                               factors=[4, 5, 6],
                                               pairwise=True,
                                               max_factors=10000,
                                               min_occurrence=1)
        assert_is_type(two_way_interactions, H2OFrame)
        assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
            "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
            "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
        levels1 = two_way_interactions.levels()[0]
        levels2 = two_way_interactions.levels()[1]
        levels3 = two_way_interactions.levels()[2]

        assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
            "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                    levels1)
        assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
            "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                    levels2)
        assert levels3 == [
            "0_0", "1_0", "0_1"
        ], "Expected the following levels {0}, but got {1}".format(
            ["0_0", "1_0", "0_1"], levels3)
    except:
        assert False, "h2o.interaction() command not is working."
コード例 #5
0
def h2ointeraction():
    """
    Python API test: h2o.interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None)

    Copied from pyunit_interaction.py
    """
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

    # add a couple of factor columns to iris
    iris = iris.cbind(iris[4] == "Iris-setosa")
    iris[5] = iris[5].asfactor()
    iris.set_name(5,"C6")

    iris = iris.cbind(iris[4] == "Iris-virginica")
    iris[6] = iris[6].asfactor()
    iris.set_name(6, name="C7")

    # create a frame of the two-way interactions
    two_way_interactions = h2o.interaction(iris, factors=[4,5,6], pairwise=True, max_factors=10000,
                                           min_occurrence=1)
    assert_is_type(two_way_interactions, H2OFrame)
    assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \
        "Expected 150 rows and 3 columns, but got {0} rows and {1} " \
        "columns".format(two_way_interactions.nrow, two_way_interactions.ncol)
    levels1 = two_way_interactions.levels()[0]
    levels2 = two_way_interactions.levels()[1]
    levels3 = two_way_interactions.levels()[2]

    assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"],
                                                                levels1)
    assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \
        "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"],
                                                                levels2)
    assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"],
                                                                                                     levels3)