Beispiel #1
0
def test_multi_target_random_forest():
    import shap
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestRegressor

    X_train, X_test, Y_train, _ = train_test_split(*shap.datasets.linnerud(),
                                                   test_size=0.2,
                                                   random_state=0)
    est = RandomForestRegressor(random_state=202,
                                n_estimators=10,
                                max_depth=10)
    est.fit(X_train, Y_train)
    predicted = est.predict(X_test)

    explainer = shap.TreeExplainer(est)
    expected_values = np.asarray(explainer.expected_value)
    assert len(
        expected_values
    ) == est.n_outputs_, "Length of expected_values doesn't match n_outputs_"
    shap_values = np.asarray(explainer.shap_values(X_test)).reshape(
        est.n_outputs_ * X_test.shape[0], X_test.shape[1])
    phi = np.hstack((shap_values, np.repeat(expected_values,
                                            X_test.shape[0]).reshape(-1, 1)))
    assert np.allclose(phi.sum(1), predicted.flatten(order="F"), atol=1e-4)
#Train Models
device = "Device001"

resultsPdf = pd.DataFrame()
for numTrees, maxDepth in [(numTrees,maxDepth) for numTrees in numTreesList for maxDepth in maxDepthList]:
  with mlflow.start_run(run_name="Sensor Regression"):
    
    mlflow.log_param("maxDepth", maxDepth)
    mlflow.log_param("numTrees", numTrees)
    mlflow.log_param("model", "Radom Forest Regressor - scikit")
    
    # Fit, train, and score the model
    model = RandomForestRegressor(max_depth = maxDepth, n_estimators = numTrees)
    model.fit(train_x, train_y)
    preds = model.predict(test_x)

    # Get Metrics
    mse = mean_squared_error(test_y, preds)
    r2 = r2_score(test_y, preds)

    # Log Metrics and Model
    mlflow.log_metric('mse', mse)
    mlflow.log_metric('r2', r2)
    mlflow.sklearn.log_model(model, "model")

    # Build Metrics Table
    results = [[device, maxDepth, numTrees, mse, r2]]
    runResultsPdf = pd.DataFrame(results, columns =['Device', 'MaxDepth', 'NumTrees', 'MSE', 'r2'])
    resultsPdf = resultsPdf.append(runResultsPdf)