def test_aft_regression_survival(self):
     data = self.spark.createDataFrame(
         [(1.0, Vectors.dense(1.0), 1.0),
          (1e-40, Vectors.sparse(1, [], []), 0.0)],
         ["label", "features", "censor"])
     gbt = AFTSurvivalRegression()
     model = gbt.fit(data)
     feature_count = data.first()[1].size
     model_onnx = convert_sparkml(
         model,
         'Sparkml AFTSurvivalRegression',
         [('features', FloatTensorType([1, feature_count]))],
         spark_session=self.spark)
     self.assertTrue(model_onnx is not None)
     # run the model
     predicted = model.transform(data)
     data_np = data.toPandas().features.apply(
         lambda x: pandas.Series(x.toArray())).values.astype(numpy.float32)
     expected = [
         predicted.toPandas().prediction.values.astype(numpy.float32),
     ]
     paths = save_data_models(data_np,
                              expected,
                              model,
                              model_onnx,
                              basename="SparkmlAFTSurvivalRegression")
     onnx_model_path = paths[3]
     output, output_shapes = run_onnx_model(['prediction'], data_np,
                                            onnx_model_path)
     compare_results(expected, output, decimal=5)
예제 #2
0
def survival_regression(trainingDataFrame, quantileProbabilities=[0.3, 0.6],
                        quantilesCol="quantiles"):
    aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
                                quantilesCol=quantilesCol)
    aftModel = aft.fit(trainingDataFrame)
    result = {}
    result["model"] = aftModel
    result["intercept"] = aftModel.intercept
    result["coefficients"] = aftModel.coefficients
    result["scale"] = aftModel.scale
    return result
예제 #3
0
def aftsurvivalRegression(df, conf):
  """ AFT Survival Regression training
        Input  : - Dataframe of training (df)
                 - tuning and hiperparameter configuration (conf)
        output : - AFT survival regression model (model)
  """
  feature_col = conf["params"].get("featuresCol", "features")
  label_col = conf["params"].get("labelCol", "label")
  pred_col = conf["params"].get("predictionCol", "prediction")
  cens_col = conf["params"].get("censorCol", "censor")
  fit_intercept = conf["params"].get("fitIntercept",True)
  max_iter = conf["params"].get("maxIter", 100)
  tol = conf["params"].get("tol", )
  quant_p = conf["params"].get("quantileProbabilities", [0.01, 0.05, 0.1, 0.25, 
                                                        0.5, 0.75, 0.9, 0.95, 0.99])
  quant_col = conf["params"].get("quantilesCol", None)
  agg_depth = conf["params"].get("aggregationDepth", 2)
      
  afts = AFTSurvivalRegression(featuresCol=feature_col,labelCol=label_col,
                          predictionCol=pred_col, censorCol=cens_col,
                          maxIter=max_iter, fitIntercept=fit_intercept,
                          tol=tol, aggregationDepth=agg_depth)

  if conf["tuning"]:
    if conf["tuning"].get("method").lower() == "crossval":
      folds = conf["tuning"].get("methodParam", 2)
      # Set the hiperparameter that we want to grid, incase: maxIter and aggregationDepth
      paramGrids = conf["tuning"].get("paramGrids")
      pg=ParamGridBuilder()
      for key in paramGrids:
          pg.addGrid(key, paramGrids[key])
      grid = pg.build()
      evaluator = RegressionEvaluator()
      cv = CrossValidator(estimator=afts, estimatorParamMaps=grid,
                          evaluator=evaluator, numFolds=folds)
      model = cv.fit(df)
      
    elif conf["tuning"].get("method").lower() == "trainvalsplit":
      tr = conf["tuning"].get("methodParam", 0.8)
      # Set the hiperparameter that we want to grid, incase: maxIter and aggregationDepth
      paramGrids = conf["tuning"].get("paramGrids")
      pg=ParamGridBuilder()
      for key in paramGrids:
          pg.addGrid(key, paramGrids[key])
      grid = pg.build()
      evaluator = RegressionEvaluator()
      tvs = TrainValidationSplit(estimator=afts, estimatorParamMaps=grid,
                                 evaluator=evaluator, trainRatio=tr)
      model = tvs.fit(df)
  elif conf["tuning"] ==  None:
    model = afts.fit(df)
  return model
예제 #4
0
def AFT(df_data):
    print("Train a AFTSurvivalRegression model...")
    quantileProbabilities = [0.4, 0.5]
    t1 = time.time()
    # Chain indexer and tree in a Pipeline

    aft_model = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
                                quantilesCol="quantiles")\
        .fit(df_data)
    t2 = time.time() - t1
    print("aft_model using time: %.2fs\n" % t2)
    return aft_model
"""

if __name__ == "__main__":
    spark = SparkSession \
        .builder \
        .appName("PythonAFTSurvivalRegressionExample") \
        .getOrCreate()

    # $example on$
    training = spark.createDataFrame(
        [(1.218, 1.0, Vectors.dense(1.560, -0.605)),
         (2.949, 0.0, Vectors.dense(0.346, 2.158)),
         (3.627, 0.0, Vectors.dense(1.380, 0.231)),
         (0.273, 1.0, Vectors.dense(0.520, 1.151)),
         (4.199, 0.0, Vectors.dense(0.795, -0.226))],
        ["label", "censor", "features"])
    quantileProbabilities = [0.3, 0.6]
    aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
                                quantilesCol="quantiles")

    model = aft.fit(training)

    # Print the coefficients, intercept and scale parameter for AFT survival regression
    print("Coefficients: " + str(model.coefficients))
    print("Intercept: " + str(model.intercept))
    print("Scale: " + str(model.scale))
    model.transform(training).show(truncate=False)
    # $example off$

    spark.stop()
예제 #6
0
  bin/spark-submit examples/src/main/python/ml/aft_survival_regression.py
"""

if __name__ == "__main__":
    spark = SparkSession.builder.appName("PythonAFTSurvivalRegressionExample").getOrCreate()

    # $example on$
    training = spark.createDataFrame(
        [
            (1.218, 1.0, Vectors.dense(1.560, -0.605)),
            (2.949, 0.0, Vectors.dense(0.346, 2.158)),
            (3.627, 0.0, Vectors.dense(1.380, 0.231)),
            (0.273, 1.0, Vectors.dense(0.520, 1.151)),
            (4.199, 0.0, Vectors.dense(0.795, -0.226)),
        ],
        ["label", "censor", "features"],
    )
    quantileProbabilities = [0.3, 0.6]
    aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities, quantilesCol="quantiles")

    model = aft.fit(training)

    # Print the coefficients, intercept and scale parameter for AFT survival regression
    print("Coefficients: " + str(model.coefficients))
    print("Intercept: " + str(model.intercept))
    print("Scale: " + str(model.scale))
    model.transform(training).show(truncate=False)
    # $example off$

    spark.stop()
def train_model(training):
    quantileProbabilities = [0.3, 0.6]
    aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
                                quantilesCol="quantiles")
    model = aft.fit(training)
    return model