Python LinearRegression.getLabelCol Examples

Programming Language: Python

Namespace/Package Name: pyspark.ml.regression

Class/Type: LinearRegression

Method/Function: getLabelCol

Examples at hotexamples.com: 1

Python LinearRegression.getLabelCol - 1 examples found. These are the top rated real world Python examples of pyspark.ml.regression.LinearRegression.getLabelCol extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LinearRegression(30)

fit(30)

transform(15)

evaluate(9)

setPredictionCol(6)

save(5)

explainParams(4)

load(3)

setLabelCol(3)

write(3)

setMaxIter(2)

getPredictionCol(1)

copy(1)

getMaxIter(1)

predict(1)

getLabelCol(1)

setFeaturesCol(1)

getAggregationDepth(1)

setParams(1)

fitMultiple(1)

extractParamMap(1)

getRegParam(1)

Example #1

Show file

def pipeline(request):
    unique_fields = custom_fields(request)
    date_column = CustomFields.objects.first()
    date_column = date_column.date_column

    # First, read the data
    data_df = read_df(request, 'clean')
    json_df = data_df.toPandas()
    json_df.to_json()

    # Cast all the columns to numeric
    new_df = data_df.select(
        [col(c).cast("double").alias(c) for c in data_df.columns])
    new_df = new_df.fillna(0.0)
    new_df.show()

    # Split data into training and test sets
    train, test = new_df.randomSplit([0.7, 0.3])

    # Feature Processing
    featuresCols = new_df.columns
    featuresCols.remove(unique_fields['prediction'])

    try:
        featuresCols.remove(date_column)
    except:
        pass

    # This concatenates all feature columns into a single feature vector in a new column 'rawFeatures'
    vectorAssembler = VectorAssembler(inputCols=featuresCols,
                                      outputCol='rawFeatures')

    # Model Training
    standardScaler = StandardScaler(inputCol="rawFeatures",
                                    outputCol="features")
    lr = LinearRegression(labelCol=unique_fields['prediction'],
                          maxIter=10,
                          regParam=.01)

    # Model tuning
    paramGrid = ParamGridBuilder() \
        .addGrid(lr.maxIter, [10, 100, 1000]) \
        .addGrid(lr.regParam, [0.1, 0.01]) \
        .addGrid(lr.fitIntercept, [False, True]) \
        .addGrid(lr.elasticNetParam, [0.0, 0.5, 1.0]) \
        .build()

    # We define an evaluation metric.
    # This tells CrossValidator how well we are doing by comparing the true labels with predictions
    evaluator = RegressionEvaluator(metricName="rmse",
                                    labelCol=lr.getLabelCol(),
                                    predictionCol=lr.getPredictionCol())

    # Declare the CrossValidator which runs model tuning for us.
    cv = CrossValidator(estimator=lr,
                        evaluator=evaluator,
                        estimatorParamMaps=paramGrid)

    stages = [vectorAssembler, standardScaler, cv]

    # Train the pipeline
    pipeline = Pipeline(stages=stages)

    model = pipeline.fit(train)
    predictions = model.transform(test)

    rmse = evaluator.evaluate(predictions)
    print("RMSE on our test set is: " + str(rmse))

    predictions.show()

    predicted_df = predictions.toPandas()
    predicted_df.to_json()
    # rmse = 23
    context = {'all_data': json_df, 'rmse': rmse, 'predicted': predicted_df}
    return render(request, 'show_predictions.html', context)