Exemple #1
0
 def test_linear_regression(self):
     lr = LinearRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/lr"
     lr.save(lr_path)
     lr2 = LinearRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LinearRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
Exemple #2
0
 def test_linear_regression(self):
     lr = LinearRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/lr"
     lr.save(lr_path)
     lr2 = LinearRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LinearRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
Exemple #3
0
def linear_regression():
    spark = SparkSession \
        .builder \
        .appName("Python Spark SQL basic example") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
    df = spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                (0.0, 2.0, Vectors.sparse(1, [], []))],
                               ["label", "weight", "features"])
    lr = LinearRegression(maxIter=5,
                          regParam=0.0,
                          solver="normal",
                          weightCol="weight")
    model = lr.fit(df)
    test0 = spark.createDataFrame([(Vectors.dense(-1.0), )], ["features"])
    abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
    # True
    abs(model.coefficients[0] - 1.0) < 0.001
    # True
    abs(model.intercept - 0.0) < 0.001
    # True
    test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]), )],
                                  ["features"])
    abs(model.transform(test1).head().prediction - 1.0) < 0.001
    # True
    lr.setParams("vector")
    # Traceback (most recent call last):
    #    ...
    # TypeError: Method setParams forces keyword arguments.
    temp_path = "./"
    lr_path = temp_path + "/lr"
    lr.save(lr_path)
    lr2 = LinearRegression.load(lr_path)
    lr2.getMaxIter()
    # 5
    model_path = temp_path + "/lr_model"
    model.save(model_path)
    model2 = LinearRegressionModel.load(model_path)
    model.coefficients[0] == model2.coefficients[0]
    # True
    model.intercept == model2.intercept
    # True
    model.numFeatures
Exemple #4
0
train_data, test_data = scaled_df.randomSplit([.8,.2],seed=1234)

# Import `LinearRegression`
from pyspark.ml.regression import LinearRegression

# Initialize `lr`
lr = LinearRegression(labelCol="label", maxIter=10, regParam=0.3, elasticNetParam=0.8)

# Fit the data to the model
linearModel = lr.fit(train_data)

# Save the model
model_1.save('my_cal_house_lin_model')  # Ensure correct path here

# Load my model
LinearRegression.load('my_cal_house_lin_model')   # Ensure correct path here


# Generate predictions
predicted = linearModel.transform(test_data)

# Extract the predictions and the "known" correct labels
predictions = predicted.select("prediction").rdd.map(lambda x: x[0])
labels = predicted.select("label").rdd.map(lambda x: x[0])

# Zip `predictions` and `labels` into a list
predictionAndLabel = predictions.zip(labels).collect()

# Print out first 5 instances of `predictionAndLabel` 
predictionAndLabel[:5]