def interaction_check(): # Connect to a pre-existing cluster iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv")) # add a couple of factor columns to iris iris = iris.cbind(iris[4] == "Iris-setosa") iris[5] = iris[5].asfactor() iris.set_name(5, "C6") iris = iris.cbind(iris[4] == "Iris-virginica") iris[6] = iris[6].asfactor() iris.set_name(6, name="C7") # create a frame of the two-way interactions two_way_interactions = h2o.interaction(iris, factors=[4, 5, 6], pairwise=True, max_factors=10000, min_occurrence=1) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions[0].levels() levels2 = two_way_interactions[1].levels() levels3 = two_way_interactions[2].levels() assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == [ "0_0", "1_0", "0_1" ], "Expected the following levels {0}, but got {1}".format( ["0_0", "1_0", "0_1"], levels3) # do the same thing, but set 'factors' arg to list of column names two_way_interactions = h2o.interaction(iris, factors=["C5", "C6", "C7"], pairwise=True, max_factors=10000, min_occurrence=1) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions[0].levels() levels2 = two_way_interactions[1].levels() levels3 = two_way_interactions[2].levels() assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == [ "0_0", "1_0", "0_1" ], "Expected the following levels {0}, but got {1}".format( ["0_0", "1_0", "0_1"], levels3)
def interaction_check(): # Connect to a pre-existing cluster iris = h2o.import_file(path=h2o.locate("smalldata/iris/iris.csv")) # add a couple of factor columns to iris iris = iris.cbind(iris[4] == "Iris-setosa") iris[5] = iris[5].asfactor() iris.setName(5,"C6") iris = iris.cbind(iris[4] == "Iris-virginica") iris[6] = iris[6].asfactor() iris.setName(6, name="C7") # create a frame of the two-way interactions two_way_interactions = h2o.interaction(iris, factors=[4,5,6], pairwise=True, max_factors=10000, min_occurrence=1) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions[0].levels() levels2 = two_way_interactions[1].levels() levels3 = two_way_interactions[2].levels() assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"], levels3) # do the same thing, but set 'factors' arg to list of column names two_way_interactions = h2o.interaction(iris, factors=["C5","C6","C7"], pairwise=True, max_factors=10000, min_occurrence=1) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions[0].levels() levels2 = two_way_interactions[1].levels() levels3 = two_way_interactions[2].levels() assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"], levels3)
def interaction(self, factors, pairwise, max_factors, min_occurrence, destination_frame=None): """ Categorical Interaction Feature Creation in H2O. Creates a frame in H2O with n-th order interaction features between categorical columns, as specified by the user. :param factors: factors Factor columns (either indices or column names). :param pairwise: Whether to create pairwise interactions between factors (otherwise create one higher-order interaction). Only applicable if there are 3 or more factors. :param max_factors: Max. number of factor levels in pair-wise interaction terms (if enforced, one extra catch-all factor will be made) :param min_occurrence: Min. occurrence threshold for factor levels in pair-wise interaction terms :param destination_frame: A string indicating the destination key. If empty, this will be auto-generated by H2O. :return: H2OFrame """ return h2o.interaction(data=self, factors=factors, pairwise=pairwise, max_factors=max_factors, min_occurrence=min_occurrence, destination_frame=destination_frame)
def h2ointeraction(): """ Python API test: h2o.interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None) Copied from pyunit_interaction.py """ try: iris = h2o.import_file( path=pyunit_utils.locate("smalldata/iris/iris.csv")) # add a couple of factor columns to iris iris = iris.cbind(iris[4] == "Iris-setosa") iris[5] = iris[5].asfactor() iris.set_name(5, "C6") iris = iris.cbind(iris[4] == "Iris-virginica") iris[6] = iris[6].asfactor() iris.set_name(6, name="C7") # create a frame of the two-way interactions two_way_interactions = h2o.interaction(iris, factors=[4, 5, 6], pairwise=True, max_factors=10000, min_occurrence=1) assert_is_type(two_way_interactions, H2OFrame) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions.levels()[0] levels2 = two_way_interactions.levels()[1] levels3 = two_way_interactions.levels()[2] assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == [ "0_0", "1_0", "0_1" ], "Expected the following levels {0}, but got {1}".format( ["0_0", "1_0", "0_1"], levels3) except: assert False, "h2o.interaction() command not is working."
def h2ointeraction(): """ Python API test: h2o.interaction(data, factors, pairwise, max_factors, min_occurrence, destination_frame=None) Copied from pyunit_interaction.py """ iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv")) # add a couple of factor columns to iris iris = iris.cbind(iris[4] == "Iris-setosa") iris[5] = iris[5].asfactor() iris.set_name(5,"C6") iris = iris.cbind(iris[4] == "Iris-virginica") iris[6] = iris[6].asfactor() iris.set_name(6, name="C7") # create a frame of the two-way interactions two_way_interactions = h2o.interaction(iris, factors=[4,5,6], pairwise=True, max_factors=10000, min_occurrence=1) assert_is_type(two_way_interactions, H2OFrame) assert two_way_interactions.nrow == 150 and two_way_interactions.ncol == 3, \ "Expected 150 rows and 3 columns, but got {0} rows and {1} " \ "columns".format(two_way_interactions.nrow, two_way_interactions.ncol) levels1 = two_way_interactions.levels()[0] levels2 = two_way_interactions.levels()[1] levels3 = two_way_interactions.levels()[2] assert levels1 == ["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_1", "Iris-versicolor_0", "Iris-virginica_0"], levels1) assert levels2 == ["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], \ "Expected the following levels {0}, but got {1}".format(["Iris-setosa_0", "Iris-versicolor_0", "Iris-virginica_1"], levels2) assert levels3 == ["0_0", "1_0", "0_1"], "Expected the following levels {0}, but got {1}".format(["0_0", "1_0", "0_1"], levels3)