예제 #1
0
    def main(self, sc, *args):
        points_rdd = self.requires().get_points_rdd(sc)

        model = GeneralizedLinearRegression(family='poisson',
                                            link=self.link,
                                            maxIter=self.iterations)

        spark_sql = SparkSession.builder.getOrCreate()
        model = model.fit(spark_sql.createDataFrame(points_rdd))
        model.save(self.output().path)
예제 #2
0
def generalized_linear_regression():
    spark = SparkSession \
        .builder \
        .appName("Python Spark SQL basic example") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
    df = spark.createDataFrame([
        (1.0, Vectors.dense(0.0, 0.0)),
        (1.0, Vectors.dense(1.0, 2.0)),
        (2.0, Vectors.dense(0.0, 0.0)),
        (2.0, Vectors.dense(1.0, 1.0)),
    ], ["label", "features"])
    glr = GeneralizedLinearRegression(
        family="gaussian",
        link="identity",
    )  # linkPredictionCol="p")
    model = glr.fit(df)
    transformed = model.transform(df)
    abs(transformed.head().prediction - 1.5) < 0.001
    # True
    abs(transformed.head().p - 1.5) < 0.001
    # True
    model.coefficients
    model.numFeatures
    # 2
    abs(model.intercept - 1.5) < 0.001
    # True
    temp_path = "./"
    glr_path = temp_path + "/glr"
    glr.save(glr_path)
    glr2 = GeneralizedLinearRegression.load(glr_path)
    glr.getFamily() == glr2.getFamily()
    # True
    model_path = temp_path + "/glr_model"
    model.save(model_path)
    model2 = GeneralizedLinearRegressionModel.load(model_path)
    model.intercept == model2.intercept
    # True
    model.coefficients[0] == model2.coefficients[0]