Beispiel #1
0
 def test_linear_regression(self):
     lr = LinearRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/lr"
     lr.save(lr_path)
     lr2 = LinearRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LinearRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
Beispiel #2
0
 def test_linear_regression(self):
     lr = LinearRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/lr"
     lr.save(lr_path)
     lr2 = LinearRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LinearRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
Beispiel #3
0
def linear_regression():
    spark = SparkSession \
        .builder \
        .appName("Python Spark SQL basic example") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
    df = spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                (0.0, 2.0, Vectors.sparse(1, [], []))],
                               ["label", "weight", "features"])
    lr = LinearRegression(maxIter=5,
                          regParam=0.0,
                          solver="normal",
                          weightCol="weight")
    model = lr.fit(df)
    test0 = spark.createDataFrame([(Vectors.dense(-1.0), )], ["features"])
    abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
    # True
    abs(model.coefficients[0] - 1.0) < 0.001
    # True
    abs(model.intercept - 0.0) < 0.001
    # True
    test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]), )],
                                  ["features"])
    abs(model.transform(test1).head().prediction - 1.0) < 0.001
    # True
    lr.setParams("vector")
    # Traceback (most recent call last):
    #    ...
    # TypeError: Method setParams forces keyword arguments.
    temp_path = "./"
    lr_path = temp_path + "/lr"
    lr.save(lr_path)
    lr2 = LinearRegression.load(lr_path)
    lr2.getMaxIter()
    # 5
    model_path = temp_path + "/lr_model"
    model.save(model_path)
    model2 = LinearRegressionModel.load(model_path)
    model.coefficients[0] == model2.coefficients[0]
    # True
    model.intercept == model2.intercept
    # True
    model.numFeatures
# Initialize `lr`
lr = LinearRegression(labelCol="label",
                      maxIter=100,
                      regParam=0.3,
                      elasticNetParam=0.8)

# Fit the data to the model
linearModel = lr.fit(train_data)

#Lets run this on our test dataset
predicted = linearModel.transform(test_data)

# Extract the predictions and the "known" correct labels
predictions = predicted.select("prediction").rdd.map(lambda x: x[0])
labels = predicted.select("label").rdd.map(lambda x: x[0])

# Zip `predictions` and `labels` into a list
predictionAndLabel = predictions.zip(labels).collect()

# Print out first 5 instances of `predictionAndLabel`
predictionAndLabel[:5]

#This model can then be saved easily
lr.save("/home/hduser/lrm_model.model")

# We can save the model using below command
sameModel = LogisticRegressionModel.load("/home/hduser/lrm_model.model")

linearModel.summary.rootMeanSquaredError
Beispiel #5
0
#training the model
regressor = LinearRegression(featuresCol='features', labelCol='Close')
regressor = regressor.fit(train_data)
#Finding  coefficients
print(regressor.coefficients)
#finding intercept
print(regressor.intercept)

pred_results = regressor.evaluate(test_data)

print(pred_results.predictions.show())

from pyspark.ml.evaluation import RegressionEvaluator
#Finding coefficient of determination and  rsme values
try:
    # training Summary
    trainingSummary = regressor.summary
    print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
    print("r2: %f" % trainingSummary.r2)
except:
    print(" Model Test have a Problem")

#saving the model
regressor.save("StockPricepred_Model")
print("Succesfully Saved")

#import pickle
#Pkl_Filename = "Regressor_Model"
#with open(Pkl_Filename, 'wb') as f:
#   pickle.dump(regressor, f)
Beispiel #6
0
lm_transform = lm_model_fit.transform(testDf)
results = lm_transform.select(lm_transform['prediction'], lm_transform['label'])
MSE = results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()
print("Linear Regression testing Mean Squared Error = " + str(MSE))

res = results.collect()
predsAndLabels = sc.parallelize([i.asDict().values() for i in res])
metrics = RegressionMetrics(predsAndLabels)


print metrics.meanSquaredError
print metrics.rootMeanSquaredError
print metrics.r2
print metrics.explainedVariance

lm_model.save(sc, "LinerRegressionModel")



# LASSO

lasso_model = LinearRegression(featuresCol="features", predictionCol="prediction", maxIter=100, regParam=1.0, elasticNetParam=0.0, tol=1e-6)
lasso_model_fit = lasso_model.fit(trainDf)
lasso_transform = lasso_model_fit.transform(trainDf) #change to a test model
lasso_results = lasso_transform.select(lasso_transform['prediction'], lasso_transform['label'])
lasso_MSE = lasso_results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()
print("LASSO training Mean Squared Error = " + str(lasso_MSE))

lasso_transform = lasso_model_fit.transform(testDf) #change to a test model
lasso_results = lasso_transform.select(lasso_transform['prediction'], lasso_transform['label'])
lasso_MSE = lasso_results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()
Beispiel #7
0
lm_transform = lm_model_fit.transform(testDf)
results = lm_transform.select(lm_transform['prediction'], lm_transform['label'])
MSE = results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()
print("Linear Regression testing Mean Squared Error = " + str(MSE))

res = results.collect()
predsAndLabels = sc.parallelize([i.asDict().values() for i in res])
metrics = RegressionMetrics(predsAndLabels)


print metrics.meanSquaredError
print metrics.rootMeanSquaredError
print metrics.r2
print metrics.explainedVariance

lm_model.save(sc, "LinerRegressionModel")



# LASSO

lasso_model = LinearRegression(featuresCol="features", predictionCol="prediction", maxIter=100, regParam=1.0, elasticNetParam=0.0, tol=1e-6)
lasso_model_fit = lasso_model.fit(trainDf)
lasso_transform = lasso_model_fit.transform(trainDf) #change to a test model
lasso_results = lasso_transform.select(lasso_transform['prediction'], lasso_transform['label'])
lasso_MSE = lasso_results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()
print("LASSO training Mean Squared Error = " + str(lasso_MSE))

lasso_transform = lasso_model_fit.transform(testDf) #change to a test model
lasso_results = lasso_transform.select(lasso_transform['prediction'], lasso_transform['label'])
lasso_MSE = lasso_results.map(lambda (p,l):(p-l)**2).reduce(lambda x,y:x+y)/results.count()