def test_linear_regression(self): lr = LinearRegression(maxIter=1) path = tempfile.mkdtemp() lr_path = path + "/lr" lr.save(lr_path) lr2 = LinearRegression.load(lr_path) self.assertEqual(lr2.uid, lr2.maxIter.parent, "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)" % (lr2.uid, lr2.maxIter.parent)) self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter], "Loaded LinearRegression instance default params did not match " + "original defaults") try: rmtree(path) except OSError: pass
def linear_regression(): spark = SparkSession \ .builder \ .appName("Python Spark SQL basic example") \ .config("spark.some.config.option", "some-value") \ .getOrCreate() df = spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)), (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"]) lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight") model = lr.fit(df) test0 = spark.createDataFrame([(Vectors.dense(-1.0), )], ["features"]) abs(model.transform(test0).head().prediction - (-1.0)) < 0.001 # True abs(model.coefficients[0] - 1.0) < 0.001 # True abs(model.intercept - 0.0) < 0.001 # True test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]), )], ["features"]) abs(model.transform(test1).head().prediction - 1.0) < 0.001 # True lr.setParams("vector") # Traceback (most recent call last): # ... # TypeError: Method setParams forces keyword arguments. temp_path = "./" lr_path = temp_path + "/lr" lr.save(lr_path) lr2 = LinearRegression.load(lr_path) lr2.getMaxIter() # 5 model_path = temp_path + "/lr_model" model.save(model_path) model2 = LinearRegressionModel.load(model_path) model.coefficients[0] == model2.coefficients[0] # True model.intercept == model2.intercept # True model.numFeatures
train_data, test_data = scaled_df.randomSplit([.8,.2],seed=1234) # Import `LinearRegression` from pyspark.ml.regression import LinearRegression # Initialize `lr` lr = LinearRegression(labelCol="label", maxIter=10, regParam=0.3, elasticNetParam=0.8) # Fit the data to the model linearModel = lr.fit(train_data) # Save the model model_1.save('my_cal_house_lin_model') # Ensure correct path here # Load my model LinearRegression.load('my_cal_house_lin_model') # Ensure correct path here # Generate predictions predicted = linearModel.transform(test_data) # Extract the predictions and the "known" correct labels predictions = predicted.select("prediction").rdd.map(lambda x: x[0]) labels = predicted.select("label").rdd.map(lambda x: x[0]) # Zip `predictions` and `labels` into a list predictionAndLabel = predictions.zip(labels).collect() # Print out first 5 instances of `predictionAndLabel` predictionAndLabel[:5]