Ejemplo n.º 1
0
def check_params(test_self, py_stage, check_params_exist=True):
    """
    Checks common requirements for :py:class:`PySpark.ml.Params.params`:

      - set of params exist in Java and Python and are ordered by names
      - param parent has the same UID as the object's UID
      - default param value from Java matches value in Python
      - optionally check if all params from Java also exist in Python
    """
    py_stage_str = "%s %s" % (type(py_stage), py_stage)
    if not hasattr(py_stage, "_to_java"):
        return
    java_stage = py_stage._to_java()
    if java_stage is None:
        return
    test_self.assertEqual(py_stage.uid, java_stage.uid(), msg=py_stage_str)
    if check_params_exist:
        param_names = [p.name for p in py_stage.params]
        java_params = list(java_stage.params())
        java_param_names = [jp.name() for jp in java_params]
        test_self.assertEqual(
            param_names,
            sorted(java_param_names),
            "Param list in Python does not match Java for %s:\nJava = %s\nPython = %s"
            % (py_stage_str, java_param_names, param_names),
        )
    for p in py_stage.params:
        test_self.assertEqual(p.parent, py_stage.uid)
        java_param = java_stage.getParam(p.name)
        py_has_default = py_stage.hasDefault(p)
        java_has_default = java_stage.hasDefault(java_param)
        test_self.assertEqual(
            py_has_default,
            java_has_default,
            "Default value mismatch of param %s for Params %s" %
            (p.name, str(py_stage)),
        )
        if py_has_default:
            if p.name == "seed":
                continue  # Random seeds between Spark and PySpark are different
            java_default = _java2py(
                test_self.sc,
                java_stage.clear(java_param).getOrDefault(java_param))
            py_stage.clear(p)
            py_default = py_stage.getOrDefault(p)
            # equality test for NaN is always False
            if isinstance(java_default, float) and np.isnan(java_default):
                java_default = "NaN"
                py_default = "NaN" if np.isnan(py_default) else "not NaN"
            test_self.assertEqual(
                java_default,
                py_default,
                "Java default %s != python default %s of param %s for Params %s"
                % (str(java_default), str(py_default), p.name, str(py_stage)),
            )
Ejemplo n.º 2
0
 def test_new_java_array(self):
     # test array of strings
     str_list = ["a", "b", "c"]
     java_class = self.sc._gateway.jvm.java.lang.String
     java_array = JavaWrapper._new_java_array(str_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), str_list)
     # test array of integers
     int_list = [1, 2, 3]
     java_class = self.sc._gateway.jvm.java.lang.Integer
     java_array = JavaWrapper._new_java_array(int_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), int_list)
     # test array of floats
     float_list = [0.1, 0.2, 0.3]
     java_class = self.sc._gateway.jvm.java.lang.Double
     java_array = JavaWrapper._new_java_array(float_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), float_list)
     # test array of bools
     bool_list = [False, True, True]
     java_class = self.sc._gateway.jvm.java.lang.Boolean
     java_array = JavaWrapper._new_java_array(bool_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), bool_list)
     # test array of Java DenseVectors
     v1 = DenseVector([0.0, 1.0])
     v2 = DenseVector([1.0, 0.0])
     vec_java_list = [_py2java(self.sc, v1), _py2java(self.sc, v2)]
     java_class = self.sc._gateway.jvm.org.apache.spark.ml.linalg.DenseVector
     java_array = JavaWrapper._new_java_array(vec_java_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), [v1, v2])
     # test empty array
     java_class = self.sc._gateway.jvm.java.lang.Integer
     java_array = JavaWrapper._new_java_array([], java_class)
     self.assertEqual(_java2py(self.sc, java_array), [])
Ejemplo n.º 3
0
 def test_new_java_array(self):
     # test array of strings
     str_list = ["a", "b", "c"]
     java_class = self.sc._gateway.jvm.java.lang.String
     java_array = JavaWrapper._new_java_array(str_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), str_list)
     # test array of integers
     int_list = [1, 2, 3]
     java_class = self.sc._gateway.jvm.java.lang.Integer
     java_array = JavaWrapper._new_java_array(int_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), int_list)
     # test array of floats
     float_list = [0.1, 0.2, 0.3]
     java_class = self.sc._gateway.jvm.java.lang.Double
     java_array = JavaWrapper._new_java_array(float_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), float_list)
     # test array of bools
     bool_list = [False, True, True]
     java_class = self.sc._gateway.jvm.java.lang.Boolean
     java_array = JavaWrapper._new_java_array(bool_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), bool_list)
     # test array of Java DenseVectors
     v1 = DenseVector([0.0, 1.0])
     v2 = DenseVector([1.0, 0.0])
     vec_java_list = [_py2java(self.sc, v1), _py2java(self.sc, v2)]
     java_class = self.sc._gateway.jvm.org.apache.spark.ml.linalg.DenseVector
     java_array = JavaWrapper._new_java_array(vec_java_list, java_class)
     self.assertEqual(_java2py(self.sc, java_array), [v1, v2])
     # test empty array
     java_class = self.sc._gateway.jvm.java.lang.Integer
     java_array = JavaWrapper._new_java_array([], java_class)
     self.assertEqual(_java2py(self.sc, java_array), [])
Ejemplo n.º 4
0
def check_params(test_self, py_stage, check_params_exist=True):
    """
    Checks common requirements for Params.params:
      - set of params exist in Java and Python and are ordered by names
      - param parent has the same UID as the object's UID
      - default param value from Java matches value in Python
      - optionally check if all params from Java also exist in Python
    """
    py_stage_str = "%s %s" % (type(py_stage), py_stage)
    if not hasattr(py_stage, "_to_java"):
        return
    java_stage = py_stage._to_java()
    if java_stage is None:
        return
    test_self.assertEqual(py_stage.uid, java_stage.uid(), msg=py_stage_str)
    if check_params_exist:
        param_names = [p.name for p in py_stage.params]
        java_params = list(java_stage.params())
        java_param_names = [jp.name() for jp in java_params]
        test_self.assertEqual(
            param_names, sorted(java_param_names),
            "Param list in Python does not match Java for %s:\nJava = %s\nPython = %s"
            % (py_stage_str, java_param_names, param_names))
    for p in py_stage.params:
        test_self.assertEqual(p.parent, py_stage.uid)
        java_param = java_stage.getParam(p.name)
        py_has_default = py_stage.hasDefault(p)
        java_has_default = java_stage.hasDefault(java_param)
        test_self.assertEqual(py_has_default, java_has_default,
                              "Default value mismatch of param %s for Params %s"
                              % (p.name, str(py_stage)))
        if py_has_default:
            if p.name == "seed":
                continue  # Random seeds between Spark and PySpark are different
            java_default = _java2py(test_self.sc,
                                    java_stage.clear(java_param).getOrDefault(java_param))
            py_stage._clear(p)
            py_default = py_stage.getOrDefault(p)
            # equality test for NaN is always False
            if isinstance(java_default, float) and np.isnan(java_default):
                java_default = "NaN"
                py_default = "NaN" if np.isnan(py_default) else "not NaN"
            test_self.assertEqual(
                java_default, py_default,
                "Java default %s != python default %s of param %s for Params %s"
                % (str(java_default), str(py_default), p.name, str(py_stage)))