Beispiel #1
0
def isotonic_regression(trainingDataFrame):
    iso = IsotonicRegression()
    isoModel = iso.fit(trainingDataFrame)
    result = {}
    result["model"] = isoModel
    result["boundaries"] = isoModel.boundaries
    result["predictions"] = isoModel.predictions
    return result
Beispiel #2
0
def isotonicRegression(df, conf):
  """ Isotonic Regression training
        Input  : - Dataframe of training (df)
        output : - Isotonic regression model (model)
  """
  feature_col = conf["params"].get("featuresCol", "features")
  label_col = conf["params"].get("labelCol", "label")
  pred_col = conf["params"].get("predictionCol", "prediction")
  isoton = conf["params"].get("isotonic",True)
  feature_index = conf["params"].get("featureIndex",0)
      
  ir = IsotonicRegression(featuresCol=feature_col,labelCol=label_col,
                          predictionCol=pred_col, isotonic=isoton, 
                          featureIndex=feature_index)

  model = ir.fit(df)
  return model
Beispiel #3
0
    def calibrate(self, df_to_calibrate):
        # Make initial prediction on calibration data set
        self.predict(df_to_calibrate, 'calibration', False, False, False)
        print('[ {0} ] : Calibrating model'.format(datetime.utcnow()))

        # Convert initial probability to input feature
        pred_df_cal = VectorAssembler(inputCols=['prob_deact'], outputCol='prob_feature', handleInvalid='keep') \
            .transform(self.pred_df)

        # Fit calibration function on results
        ir = IsotonicRegression(
            labelCol='deactivated',
            predictionCol='prob_deact_cal',
            featuresCol='prob_feature'
        )
        ir_model = ir.fit(pred_df_cal)
        self.ir_model = ir_model
    def Isoton_Regression(df, conf):
        """input :  df [spark.dataframe], config[configuration (Params and use cross validator/not)
           output : Isotonic Regression Model"""
        #        isoton = conf["params"].get("isoton")
        #        feature_index = conf["params"].get("featureIndex")

        ir = IsotonicRegression()

        if conf["crossval"].get("crossval") == True:
            grid = ParamGridBuilder().build()
            evaluator = RegressionEvaluator(metricName="r2")
            cv = CrossValidator(estimator=ir,
                                estimatorParamMaps=grid,
                                evaluator=evaluator,
                                parallelism=2)
            irmodel = cv.fit(training)
        if conf["crossval"].get("crossval") == False:

            irmodel = ir.fit(training)

        return irmodel
def isotonicRegression(df, label, features, adjust):
    """ This function returns the rmse and the predictions form the applied isotonic 
        regression model on the dataframe with the speficied feature columns """
    ## Columns with non numerical values are adjusted
    for col in adjust:
        indexer=StringIndexer(inputCol=col,outputCol="{}_num".format(col)) 
        features.append("{}_num".format(col))
        df=indexer.fit(df).transform(df)
    ## Features vector configured from dataframe for model processing
    assembler = VectorAssembler(inputCols=features, outputCol="features")
    assembled = assembler.transform(df)
    ir = IsotonicRegression(featuresCol ='features', labelCol=label)
    irModel=ir.fit(assembled)
    predictions = irModel.transform(assembled)
    ## Evaluator required for rmse estimation
    evaluator = RegressionEvaluator(labelCol=label, metricName="rmse")
    rmse = evaluator.evaluate(predictions)
    result = {
        "RMSE": rmse,
        "predictions": [r["prediction"] for r in predictions.select("prediction").collect()]
    }
    return result
Beispiel #6
0
 def binomialSparkIsoRegression(self):
     regr = IsotonicRegression()
     model = regr.fit(self.Xtrain, self.Ytrain)
     return model
Beispiel #7
0
 def scalarSparkIsoRegression(self):
     regr = IsotonicRegression()
     model = regr.fit(self.train)
     return model
Beispiel #8
0
    model = glr.fit(X_train)
    train_pred = model.transform(X_train)
    trpred = train_pred.toPandas()
    trpred.to_csv('train_pred_gr.csv')

    pred = model.transform(X_test)
    tstpred = pred.toPandas()
    tstpred.to_csv('test_pred_gr.csv')

    evaluator = RegressionEvaluator(labelCol="label",
                                    predictionCol="prediction",
                                    metricName="rmse")
    rmse = evaluator.evaluate(pred)
    print("Root Mean Squared Error (RMSE) on test data for GLR = %g" % rmse)

    model = iso.fit(X_train)
    train_pred = model.transform(X_train)
    trpred = train_pred.toPandas()
    trpred.to_csv('train_pred_iso.csv')

    pred = model.transform(X_test)
    tstpred = pred.toPandas()
    tstpred.to_csv('test_pred_iso.csv')

    evaluator = RegressionEvaluator(labelCol="label",
                                    predictionCol="prediction",
                                    metricName="rmse")
    rmse = evaluator.evaluate(pred)
    print("Root Mean Squared Error (RMSE) on test data for ISO = %g" % rmse)

    spark.stop()
Beispiel #9
0
# (1)  Import the `IsotonicRegression` class from the regression module.

from pyspark.ml.regression import IsotonicRegression

# (2)  Create an instance of the `IsotonicRegression` class.  Use the same
# features and label that we used for our linear regression model.

ir = IsotonicRegression(featuresCol="features", labelCol="duration")
print(ir.explainParams())

# (3)  Fit the isotonic regression model on the train data.  It may take a
# minute for the fit to complete.  Note that this will produce an instance of
# the `IsotonicRegressionModel` class.

ir_model = ir.fit(train)
type(ir_model)

# (4)  The model parameters are available in the `boundaries` and `predictions`
# attributes of the isotonic regression model.  Print these attributes.

ir_model.boundaries
ir_model.predictions

# (5) Apply the isotonic regression model to the train data using the `transform` method.

predictions_train = ir_model.transform(train)

# (6) Use the `RegressionEvaluator` to compute the RMSE on the train data.

evaluator.evaluate(predictions_train)