예제 #1
0
 def test_decisiontree_regressor(self):
     dt = DecisionTreeRegressor(maxDepth=1)
     path = tempfile.mkdtemp()
     dtr_path = path + "/dtr"
     dt.save(dtr_path)
     dt2 = DecisionTreeClassifier.load(dtr_path)
     self.assertEqual(dt2.uid, dt2.maxDepth.parent,
                      "Loaded DecisionTreeRegressor instance uid (%s) "
                      "did not match Param's uid (%s)"
                      % (dt2.uid, dt2.maxDepth.parent))
     self.assertEqual(dt._defaultParamMap[dt.maxDepth], dt2._defaultParamMap[dt2.maxDepth],
                      "Loaded DecisionTreeRegressor instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
예제 #2
0
파일: tests.py 프로젝트: Bella-Lin/spark
 def test_decisiontree_regressor(self):
     dt = DecisionTreeRegressor(maxDepth=1)
     path = tempfile.mkdtemp()
     dtr_path = path + "/dtr"
     dt.save(dtr_path)
     dt2 = DecisionTreeClassifier.load(dtr_path)
     self.assertEqual(dt2.uid, dt2.maxDepth.parent,
                      "Loaded DecisionTreeRegressor instance uid (%s) "
                      "did not match Param's uid (%s)"
                      % (dt2.uid, dt2.maxDepth.parent))
     self.assertEqual(dt._defaultParamMap[dt.maxDepth], dt2._defaultParamMap[dt2.maxDepth],
                      "Loaded DecisionTreeRegressor instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
예제 #3
0
def decision_tree_regressor():
    spark = SparkSession \
        .builder \
        .appName("Python Spark SQL basic example") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
    df = spark.createDataFrame([(1.0, Vectors.dense(1.0)),
                                (0.0, Vectors.sparse(1, [], []))],
                               ["label", "features"])
    dt = DecisionTreeRegressor(maxDepth=2, varianceCol="variance")
    model = dt.fit(df)
    model.depth
    # 1
    model.numNodes
    # 3
    model.featureImportances
    # SparseVector(1, {0: 1.0})
    model.numFeatures
    # 1
    test0 = spark.createDataFrame([(Vectors.dense(-1.0), )], ["features"])
    model.transform(test0).head().prediction
    # 0.0
    test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]), )],
                                  ["features"])
    model.transform(test1).head().prediction
    # 1.0
    temp_path = "./"
    dtr_path = temp_path + "/dtr"
    dt.save(dtr_path)
    dt2 = DecisionTreeRegressor.load(dtr_path)
    dt2.getMaxDepth()
    # 2
    model_path = temp_path + "/dtr_model"
    model.save(model_path)
    model2 = DecisionTreeRegressionModel.load(model_path)
    model.numNodes == model2.numNodes
    # True
    model.depth == model2.depth
    # True
    model.transform(test1).head().variance