def isotonic_regression(trainingDataFrame): iso = IsotonicRegression() isoModel = iso.fit(trainingDataFrame) result = {} result["model"] = isoModel result["boundaries"] = isoModel.boundaries result["predictions"] = isoModel.predictions return result
def isotonicRegression(df, conf): """ Isotonic Regression training Input : - Dataframe of training (df) output : - Isotonic regression model (model) """ feature_col = conf["params"].get("featuresCol", "features") label_col = conf["params"].get("labelCol", "label") pred_col = conf["params"].get("predictionCol", "prediction") isoton = conf["params"].get("isotonic",True) feature_index = conf["params"].get("featureIndex",0) ir = IsotonicRegression(featuresCol=feature_col,labelCol=label_col, predictionCol=pred_col, isotonic=isoton, featureIndex=feature_index) model = ir.fit(df) return model
def calibrate(self, df_to_calibrate): # Make initial prediction on calibration data set self.predict(df_to_calibrate, 'calibration', False, False, False) print('[ {0} ] : Calibrating model'.format(datetime.utcnow())) # Convert initial probability to input feature pred_df_cal = VectorAssembler(inputCols=['prob_deact'], outputCol='prob_feature', handleInvalid='keep') \ .transform(self.pred_df) # Fit calibration function on results ir = IsotonicRegression( labelCol='deactivated', predictionCol='prob_deact_cal', featuresCol='prob_feature' ) ir_model = ir.fit(pred_df_cal) self.ir_model = ir_model
def Isoton_Regression(df, conf): """input : df [spark.dataframe], config[configuration (Params and use cross validator/not) output : Isotonic Regression Model""" # isoton = conf["params"].get("isoton") # feature_index = conf["params"].get("featureIndex") ir = IsotonicRegression() if conf["crossval"].get("crossval") == True: grid = ParamGridBuilder().build() evaluator = RegressionEvaluator(metricName="r2") cv = CrossValidator(estimator=ir, estimatorParamMaps=grid, evaluator=evaluator, parallelism=2) irmodel = cv.fit(training) if conf["crossval"].get("crossval") == False: irmodel = ir.fit(training) return irmodel
def isotonicRegression(df, label, features, adjust): """ This function returns the rmse and the predictions form the applied isotonic regression model on the dataframe with the speficied feature columns """ ## Columns with non numerical values are adjusted for col in adjust: indexer=StringIndexer(inputCol=col,outputCol="{}_num".format(col)) features.append("{}_num".format(col)) df=indexer.fit(df).transform(df) ## Features vector configured from dataframe for model processing assembler = VectorAssembler(inputCols=features, outputCol="features") assembled = assembler.transform(df) ir = IsotonicRegression(featuresCol ='features', labelCol=label) irModel=ir.fit(assembled) predictions = irModel.transform(assembled) ## Evaluator required for rmse estimation evaluator = RegressionEvaluator(labelCol=label, metricName="rmse") rmse = evaluator.evaluate(predictions) result = { "RMSE": rmse, "predictions": [r["prediction"] for r in predictions.select("prediction").collect()] } return result
def binomialSparkIsoRegression(self): regr = IsotonicRegression() model = regr.fit(self.Xtrain, self.Ytrain) return model
def scalarSparkIsoRegression(self): regr = IsotonicRegression() model = regr.fit(self.train) return model
model = glr.fit(X_train) train_pred = model.transform(X_train) trpred = train_pred.toPandas() trpred.to_csv('train_pred_gr.csv') pred = model.transform(X_test) tstpred = pred.toPandas() tstpred.to_csv('test_pred_gr.csv') evaluator = RegressionEvaluator(labelCol="label", predictionCol="prediction", metricName="rmse") rmse = evaluator.evaluate(pred) print("Root Mean Squared Error (RMSE) on test data for GLR = %g" % rmse) model = iso.fit(X_train) train_pred = model.transform(X_train) trpred = train_pred.toPandas() trpred.to_csv('train_pred_iso.csv') pred = model.transform(X_test) tstpred = pred.toPandas() tstpred.to_csv('test_pred_iso.csv') evaluator = RegressionEvaluator(labelCol="label", predictionCol="prediction", metricName="rmse") rmse = evaluator.evaluate(pred) print("Root Mean Squared Error (RMSE) on test data for ISO = %g" % rmse) spark.stop()
# (1) Import the `IsotonicRegression` class from the regression module. from pyspark.ml.regression import IsotonicRegression # (2) Create an instance of the `IsotonicRegression` class. Use the same # features and label that we used for our linear regression model. ir = IsotonicRegression(featuresCol="features", labelCol="duration") print(ir.explainParams()) # (3) Fit the isotonic regression model on the train data. It may take a # minute for the fit to complete. Note that this will produce an instance of # the `IsotonicRegressionModel` class. ir_model = ir.fit(train) type(ir_model) # (4) The model parameters are available in the `boundaries` and `predictions` # attributes of the isotonic regression model. Print these attributes. ir_model.boundaries ir_model.predictions # (5) Apply the isotonic regression model to the train data using the `transform` method. predictions_train = ir_model.transform(train) # (6) Use the `RegressionEvaluator` to compute the RMSE on the train data. evaluator.evaluate(predictions_train)