コード例 #1
0
ファイル: test_feature.py プロジェクト: JingchengDu/spark
 def test_idf(self):
     dataset = self.spark.createDataFrame([
         (DenseVector([1.0, 2.0]),),
         (DenseVector([0.0, 1.0]),),
         (DenseVector([3.0, 0.2]),)], ["tf"])
     idf0 = IDF(inputCol="tf")
     self.assertListEqual(idf0.params, [idf0.inputCol, idf0.minDocFreq, idf0.outputCol])
     idf0m = idf0.fit(dataset, {idf0.outputCol: "idf"})
     self.assertEqual(idf0m.uid, idf0.uid,
                      "Model should inherit the UID from its parent estimator.")
     output = idf0m.transform(dataset)
     self.assertIsNotNone(output.head().idf)
     # Test that parameters transferred to Python Model
     check_params(self, idf0m)
コード例 #2
0
ファイル: test_feature.py プロジェクト: vpatryshev/spark
 def test_idf(self):
     dataset = self.spark.createDataFrame([(DenseVector([1.0, 2.0]), ),
                                           (DenseVector([0.0, 1.0]), ),
                                           (DenseVector([3.0, 0.2]), )],
                                          ["tf"])
     idf0 = IDF(inputCol="tf")
     self.assertListEqual(idf0.params,
                          [idf0.inputCol, idf0.minDocFreq, idf0.outputCol])
     idf0m = idf0.fit(dataset, {idf0.outputCol: "idf"})
     self.assertEqual(
         idf0m.uid, idf0.uid,
         "Model should inherit the UID from its parent estimator.")
     output = idf0m.transform(dataset)
     self.assertIsNotNone(output.head().idf)
     # Test that parameters transferred to Python Model
     check_params(self, idf0m)
コード例 #3
0
    def test_java_params(self):
        import pyspark.ml.feature
        import pyspark.ml.classification
        import pyspark.ml.clustering
        import pyspark.ml.evaluation
        import pyspark.ml.pipeline
        import pyspark.ml.recommendation
        import pyspark.ml.regression

        modules = [
            pyspark.ml.feature, pyspark.ml.classification,
            pyspark.ml.clustering, pyspark.ml.evaluation, pyspark.ml.pipeline,
            pyspark.ml.recommendation, pyspark.ml.regression
        ]
        for module in modules:
            for name, cls in inspect.getmembers(module, inspect.isclass):
                if not name.endswith('Model') and not name.endswith('Params') \
                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls):
                    # NOTE: disable check_params_exist until there is parity with Scala API
                    check_params(self, cls(), check_params_exist=False)

        # Additional classes that need explicit construction
        from pyspark.ml.feature import CountVectorizerModel, StringIndexerModel
        check_params(self,
                     CountVectorizerModel.from_vocabulary(['a'], 'input'),
                     check_params_exist=False)
        check_params(self,
                     StringIndexerModel.from_labels(['a', 'b'], 'input'),
                     check_params_exist=False)
コード例 #4
0
ファイル: test_param.py プロジェクト: Brett-A/spark
    def test_java_params(self):
        import pyspark.ml.feature
        import pyspark.ml.classification
        import pyspark.ml.clustering
        import pyspark.ml.evaluation
        import pyspark.ml.pipeline
        import pyspark.ml.recommendation
        import pyspark.ml.regression

        modules = [pyspark.ml.feature, pyspark.ml.classification, pyspark.ml.clustering,
                   pyspark.ml.evaluation, pyspark.ml.pipeline, pyspark.ml.recommendation,
                   pyspark.ml.regression]
        for module in modules:
            for name, cls in inspect.getmembers(module, inspect.isclass):
                if not name.endswith('Model') and not name.endswith('Params') \
                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls):
                    # NOTE: disable check_params_exist until there is parity with Scala API
                    check_params(self, cls(), check_params_exist=False)

        # Additional classes that need explicit construction
        from pyspark.ml.feature import CountVectorizerModel, StringIndexerModel
        check_params(self, CountVectorizerModel.from_vocabulary(['a'], 'input'),
                     check_params_exist=False)
        check_params(self, StringIndexerModel.from_labels(['a', 'b'], 'input'),
                     check_params_exist=False)
コード例 #5
0
ファイル: test_param.py プロジェクト: zoelin7/spark
    def test_java_params(self):
        import re

        import pyspark.ml.feature
        import pyspark.ml.classification
        import pyspark.ml.clustering
        import pyspark.ml.evaluation
        import pyspark.ml.pipeline
        import pyspark.ml.recommendation
        import pyspark.ml.regression

        modules = [
            pyspark.ml.feature,
            pyspark.ml.classification,
            pyspark.ml.clustering,
            pyspark.ml.evaluation,
            pyspark.ml.pipeline,
            pyspark.ml.recommendation,
            pyspark.ml.regression,
        ]
        for module in modules:
            for name, cls in inspect.getmembers(module, inspect.isclass):
                if (not name.endswith("Model") and not name.endswith("Params")
                        and issubclass(cls, JavaParams)
                        and not inspect.isabstract(cls)
                        and not re.match("_?Java", name) and name != "_LSH"
                        and name != "_Selector"):
                    check_params(self, cls(), check_params_exist=True)

        # Additional classes that need explicit construction
        from pyspark.ml.feature import CountVectorizerModel, StringIndexerModel

        check_params(self,
                     CountVectorizerModel.from_vocabulary(["a"], "input"),
                     check_params_exist=True)
        check_params(self,
                     StringIndexerModel.from_labels(["a", "b"], "input"),
                     check_params_exist=True)