Python LinearRegression.evaluate Beispiele

Programmiersprache: Python

Namespace / Paketname: pyspark.ml.regression

Klasse / Typ: LinearRegression

Methode / Funktion: evaluate

Beispiele auf hotexamples.com: 9

Python LinearRegression.evaluate - 9 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pyspark.ml.regression.LinearRegression.evaluate, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

LinearRegression(30)

fit(30)

transform(15)

evaluate(9)

setPredictionCol(6)

save(5)

explainParams(4)

load(3)

setLabelCol(3)

write(3)

setMaxIter(2)

getPredictionCol(1)

copy(1)

getMaxIter(1)

predict(1)

getLabelCol(1)

setFeaturesCol(1)

getAggregationDepth(1)

setParams(1)

fitMultiple(1)

extractParamMap(1)

getRegParam(1)

Beispiel #1

Datei anzeigen

Datei: main.py Projekt: bharathulaprasad/pythonProject

def predictionexample():
    # Use a breakpoint in the code line below to debug your script.
    spark = SparkSession.builder.appName('Customers').getOrCreate()
    dataset = spark.read.csv("Ecommerce_Customers.csv", inferSchema=True, header=True)
    #build feature using Vectorassembler
    featureassembler = VectorAssembler(inputCols=["Avg Session Length", "Time on App", "Time on Website", "Length of Membership"],outputCol="Independent Features")
    output = featureassembler.transform(dataset)
    output.show()

    finalized_data = output.select("Independent Features", "Yearly Amount Spent")
    finalized_data.show()
    #split the data 80%, 20%
    train_data, test_data = finalized_data.randomSplit([0.80, 0.20])
    #Regressormodel Linear
    regressor = LinearRegression(featuresCol='Independent Features', labelCol='Yearly Amount Spent')
    regressor = regressor.fit(train_data)
    pred_results = regressor.evaluate(test_data)
    pred_results.predictions.show(40)

Beispiel #2

Datei anzeigen

def train_and_pred(train, test_data, tech_only=False):
    # train the linear regression model
    lr_model = LinearRegression(featuresCol='scaledFeatures',
                                labelCol=TARGET,
                                maxIter=300,
                                regParam=1,
                                elasticNetParam=1).fit(train)
    print('Coefficients: {}'.format(str(lr_model.coefficients)))
    print('Intercept: {}'.format(str(lr_model.intercept)))

    # summarize the training
    trainingSummary = lr_model.summary
    print('Training r2 = {}'.format(float(trainingSummary.r2)))
    print('Training RMSE = {}\n'.format(
        float(trainingSummary.rootMeanSquaredError)))

    predictions_dict = dict()
    for company in test_data:
        test_company_data = test_data[company]
        lr_predictions = lr_model.transform(test_company_data)

        # Model Evaluation
        lr_evaluator = RegressionEvaluator(predictionCol='prediction',
                                           labelCol=TARGET,
                                           metricName='r2')
        test_r2 = lr_evaluator.evaluate(lr_predictions)
        print('{}, testing r2 = {}'.format(company.upper(), test_r2))

        test_result = lr_model.evaluate(test_company_data)
        print('{}, testing RMSE = {}\n'.format(
            company.upper(), test_result.rootMeanSquaredError))

        new_df = lr_predictions.drop('scaledFeatures').withColumn(
            'Instrument', lit(company))
        new_df = new_df.withColumn('Error_Pct',
                                   error_pct_udf(array(TARGET, 'prediction')))
        new_df = new_df.withColumn('Tech_Only_Pred', lit(tech_only))

        predictions_dict[company] = new_df.toPandas().reset_index().rename(
            columns={'index': 'row_num'})

    return predictions_dict

Beispiel #3

Datei anzeigen

Datei: 643 training code.py Projekt: hp482/cs643-project2

valid_output = assembler.transform(validationdataset)

valid_finalized_data = valid_output.select("Attributes",
                                           validationdataset.columns[11])
valid_finalized_data.show()

# 80/20 split train / test
train_data, test_data = finalized_data.randomSplit([0.8, 0.2])
regressor = LinearRegression(featuresCol='Attributes',
                             labelCol=dataset.columns[11])

#Train mdoel with training split
regressor = regressor.fit(train_data)

pred = regressor.evaluate(test_data)

#Predict the model
pred.predictions.show()

predictions = regressor.transform(valid_finalized_data)
predictions.show()

dataset.groupby("quality").count().show()

# ################################################################################################################
# export the trained model and create a zip file for ease of download
import shutil
from pyspark.ml.regression import LinearRegressionModel
regressor.write().overwrite().save("cs643")

Beispiel #4

Datei anzeigen

print(output.show())

finalized_data = output.select("features", "Close")

print(finalized_data.show())
#spliting the dataset in ratio 8:2
train_data, test_data = finalized_data.randomSplit([0.80, 0.20])
#training the model
regressor = LinearRegression(featuresCol='features', labelCol='Close')
regressor = regressor.fit(train_data)
#Finding  coefficients
print(regressor.coefficients)
#finding intercept
print(regressor.intercept)

pred_results = regressor.evaluate(test_data)

print(pred_results.predictions.show())

from pyspark.ml.evaluation import RegressionEvaluator
#Finding coefficient of determination and  rsme values
try:
    # training Summary
    trainingSummary = regressor.summary
    print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
    print("r2: %f" % trainingSummary.r2)
except:
    print(" Model Test have a Problem")

#saving the model
regressor.save("StockPricepred_Model")

Beispiel #5

Datei anzeigen

Datei: RegressionPredTemperature.py Projekt: garbamoussa/Hackathon-for-Social-Good

regressor = LinearRegression(featuresCol='Features', labelCol='t2mTemp')
regressor = regressor.fit(train_data)

# COMMAND ----------

# DBTITLE 1,Regression Coefficients
regressor.coefficients

# COMMAND ----------

regressor.intercept

# COMMAND ----------

# DBTITLE 1,Evaluate model with test data
pred_results = regressor.evaluate(test_data)
pred_resultsTest = regressor.evaluate(finalized_dataTest)

# COMMAND ----------

#pred_results.predictions.show()

# COMMAND ----------

# DBTITLE 1,Predicted temperature
display(pred_resultsTest.predictions)

# COMMAND ----------

import numpy as np
import matplotlib.pyplot as plt

Beispiel #6

Datei anzeigen

Datei: traintest.py Projekt: KKGITHUBNET/Cloud-Computing

##### Random Forest  Ends #####


# In[108]:


#### Linear Regression  #####
regressor = LinearRegression(featuresCol="independentFeatures",labelCol="quality")
regressor=regressor.fit(train)


# In[109]:


predResults = regressor.evaluate(val)


# In[110]:


predResults = predResults.predictions


# In[111]:


regressor.write().overwrite().save("lrModel")


# In[112]:

Beispiel #7

Datei anzeigen

Datei: main.py Projekt: dextar47/covid-19-analysis

def main():

    # making sparksession object
    conf = SparkConf().setAppName('Covid-19')
    sc = SparkContext(conf=conf)
    sc.setLogLevel("ERROR")
    spark = SparkSession(sc)

    # load data into spark dataframe
    ####################################
    df = spark.read.format("csv").option(
        "header",
        "true").load("time_series_19-covid-Confirmed_archived_0325.csv")

    # prepare data
    ####################################
    df = df.filter(F.col("Country/Region") == "Australia")
    columns_to_drop = [
        'Country/Region', 'Province/State', 'Lat', 'Long', '1/22/20',
        '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20',
        '1/29/20', '1/30/20', '1/31/20'
    ]  # '3/22/20', '3/23/20' will be the test data, will remove bit later
    df = df.drop(*columns_to_drop)
    # sum up all rows data into 1 row
    df = df.select([
        F.sum(value).alias(str(index))
        for index, value in enumerate(df.columns)
    ])
    # transpose the dataframe
    df_p = df.toPandas().transpose().reset_index()
    df_p.rename(columns={0: 'Infections'})
    df_s = spark.createDataFrame(df_p)
    df_s = df_s.select(F.col('index'), F.col("0").alias("Infections"))

    # linear regression
    ####################################
    df_s = df_s.withColumn("index_double", df_s['index'].cast(DoubleType()))
    # df_s = df_s.withColumn("infections_double", df_s['Infections'].cast(DoubleType()))
    featureassembler = VectorAssembler(inputCols=["index_double"],
                                       outputCol="new_index")
    output = featureassembler.transform(df_s)
    full_data = output.select("new_index", "Infections")
    test_data = full_data.where(F.col('index_double') > 49)
    train_data = full_data.where(F.col('index_double') < 50)
    train_data.show(50)
    test_data.show()
    regressor = LinearRegression(featuresCol='new_index',
                                 labelCol='Infections')
    regressor = regressor.fit(train_data)
    pred_results = regressor.evaluate(test_data)
    pred_results.predictions.show(60)
    print("Coefficients: " + str(regressor.coefficients))
    print("Intercept: " + str(regressor.intercept))

    trainingSummary = regressor.summary
    print("numIterations: %d" % trainingSummary.totalIterations)
    print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
    print("r2: %f" % trainingSummary.r2)

    # visualize results
    ####################################
    actual_values = pred_results.predictions.select('new_index',
                                                    'Infections').collect()
    predicted_values = pred_results.predictions.select('new_index',
                                                       'prediction').collect()

    def dfToList(df):
        av_list = []
        for row in df:
            l = []
            for val in row:
                if type(val) is DenseVector:
                    l.append(val.values[0])
                else:
                    l.append(val)
            av_list.append(l)
        return av_list

    av_list = dfToList(actual_values)
    pv_list = dfToList(predicted_values)
    x1 = [c[0] for c in av_list]
    y1 = [c[1] for c in av_list]
    x2 = [c[0] for c in predicted_values]
    y2 = [c[1] for c in predicted_values]
    plt.plot(x1, y1)
    plt.plot(x2, y2)
    plt.show()

Beispiel #8

Datei anzeigen

Datei: stock_lrr.py Projekt: BaansiShah/internship

                                   outputCol="Independent Features")

output = featureassembler.transform(dataset)

finalized_data = output.select("Independent Features", "Close")
train_data, test_data = finalized_data.randomSplit([0.75, 0.25])
regressor = LinearRegression(featuresCol='Independent Features',
                             labelCol='Close')
regressor = regressor.fit(train_data)
predictions = regressor.transform(test_data)
#predictions.show()


lr_evaluator = RegressionEvaluator(predictionCol="prediction", \
                 labelCol="Close",metricName="r2")
test_result = regressor.evaluate(test_data)

print("R Squared (R2) = %g" % lr_evaluator.evaluate(predictions))
print("Root Mean Squared Error (RMSE) = %g" % test_result.rootMeanSquaredError)
print("Mean Absolute Error = %g" % test_result.meanAbsoluteError)
print("Mean Squared Error = %g" % test_result.meanSquaredError)

actual = test_data.toPandas()['Close'].values.tolist()
predicted = predictions.toPandas()['prediction'].values.tolist()

plt.figure(figsize=(20, 10))
plt.plot(actual, label='Actual', color='green')
plt.plot(predicted, color='red', label='Predicted')
plt.legend(loc="upper left")
now = datetime.datetime.now()
diff = now - earlier

Beispiel #9

Datei anzeigen

Datei: linear_regression.py Projekt: sahilsingh1123/predictive_analysis_git

        print("coefficient : " + str(regressor.coefficients))

        coefficents_m = str(regressor.coefficients)

        print("intercept : " + str(regressor.intercept))

        intercept_b = regressor.intercept

        #
        # plt.plot(output_features, output_label)
        # plt.plot(output_features, intercept_b + coefficents_m*output_features, "-")
        # plt.show()
        #

        prediction_va = regressor.evaluate(test_data)

        prediction_val = prediction_va.predictions
        prediction_val.show()

        #############################################################################################################

        prediction_val_pand = prediction_val.select("MPG",
                                                    "prediction").toPandas()

        prediction_val_pand_sprk = spark.createDataFrame(prediction_val_pand)
        print(type(prediction_val_pand_sprk))
        # prediction_val_pand_sprk.write.csv('/home/fidel/PycharmProjects/predictive_analysis_git', header=True, mode='append')

        prediction_val_pand = prediction_val_pand.assign(
            residual_vall=prediction_val_pand["MPG"] -