def regression( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static="features", ignore_vars=None, predict_delta=False, spatial_mask=None, include_latlons=False, ): predictor = LinearRegression( get_data_path(), experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, ignore_vars=ignore_vars, predict_delta=predict_delta, spatial_mask=spatial_mask, include_latlons=include_latlons, ) predictor.train() predictor.evaluate(save_preds=True) # mostly to test it works if explain: predictor.explain(save_shap_values=True)
def regression(experiment='one_month_forecast', include_pred_month=True, surrounding_pixels=1): # if the working directory is alread ml_drought don't need ../data if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought': data_path = Path('data') else: data_path = Path('../data') predictor = LinearRegression(data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels) predictor.train() predictor.evaluate(save_preds=True) # mostly to test it works predictor.explain(save_shap_values=True)
def regression( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, ignore_vars=None, ): data_path = get_data_path() predictor = LinearRegression( data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, ignore_vars=ignore_vars, static="embeddings", spatial_mask=data_path / "interim/boundaries_preprocessed/kenya_asal_mask.nc", ) predictor.train() predictor.evaluate(save_preds=True) # mostly to test it works predictor.explain(save_shap_values=True)
data_path = Path("data") l = LinearRegression(data_path) l.train() ln = LinearNetwork(layer_sizes=[100], data_folder=data_path) ln.train(num_epochs=10) # ------------------------------------------------------------------------------ # try and explain the LinearRegression model # ------------------------------------------------------------------------------ test_arrays_loader = DataLoader( data_path=data_path, batch_file_size=1, shuffle_data=False, mode="test" ) key, val = list(next(iter(test_arrays_loader)).items())[0] explanations = l.explain(val.x) # plot the SHAP explanations # 1. mean spatial and temporal response mean_expl = explanations.mean(axis=0).mean(axis=0) x_vars = val.x_vars df = pd.DataFrame(dict(variables=x_vars, values=mean_expl)) sns.barplot(x="variables", y="values", data=df) fig = plt.gcf() plt.title(f"{key} {val.y_var} mean SHAP Values for Linear Regression") fig.savefig("scripts/mean_variable_importance_linear_regression.png", dpi=300) # 2. mean spatial response values = explanations.mean(axis=0).T.flatten()