def test_multi_target_random_forest(): import shap import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor X_train, X_test, Y_train, _ = train_test_split(*shap.datasets.linnerud(), test_size=0.2, random_state=0) est = RandomForestRegressor(random_state=202, n_estimators=10, max_depth=10) est.fit(X_train, Y_train) predicted = est.predict(X_test) explainer = shap.TreeExplainer(est) expected_values = np.asarray(explainer.expected_value) assert len( expected_values ) == est.n_outputs_, "Length of expected_values doesn't match n_outputs_" shap_values = np.asarray(explainer.shap_values(X_test)).reshape( est.n_outputs_ * X_test.shape[0], X_test.shape[1]) phi = np.hstack((shap_values, np.repeat(expected_values, X_test.shape[0]).reshape(-1, 1))) assert np.allclose(phi.sum(1), predicted.flatten(order="F"), atol=1e-4)
#Train Models device = "Device001" resultsPdf = pd.DataFrame() for numTrees, maxDepth in [(numTrees,maxDepth) for numTrees in numTreesList for maxDepth in maxDepthList]: with mlflow.start_run(run_name="Sensor Regression"): mlflow.log_param("maxDepth", maxDepth) mlflow.log_param("numTrees", numTrees) mlflow.log_param("model", "Radom Forest Regressor - scikit") # Fit, train, and score the model model = RandomForestRegressor(max_depth = maxDepth, n_estimators = numTrees) model.fit(train_x, train_y) preds = model.predict(test_x) # Get Metrics mse = mean_squared_error(test_y, preds) r2 = r2_score(test_y, preds) # Log Metrics and Model mlflow.log_metric('mse', mse) mlflow.log_metric('r2', r2) mlflow.sklearn.log_model(model, "model") # Build Metrics Table results = [[device, maxDepth, numTrees, mse, r2]] runResultsPdf = pd.DataFrame(results, columns =['Device', 'MaxDepth', 'NumTrees', 'MSE', 'r2']) resultsPdf = resultsPdf.append(runResultsPdf)