def plot_ba(experiment, **kwargs):
    # Operate on cached data only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    # Operate on cached data only.
    get_endog_exog_mask.check_in_store(experiment)
    master_mask = get_endog_exog_mask(experiment)[2]

    check_master_masks(master_mask)

    # Operate on cached fitted models only.
    get_model(X_train, y_train, cache_check=True)

    predicted_test = threading_get_model_predict(
        X_train=X_train,
        y_train=y_train,
        predict_X=X_test,
    )

    ba_plotting(
        *get_ba_plotting_data(predicted_test, y_test, master_mask),
        figure_saver=map_figure_saver(sub_directory=experiment.name),
        **get_aux0_aux1_kwargs(y_test, master_mask),
        filename=f"{experiment.name}_ba_prediction",
    )
def correlation_plot(experiment, **kwargs):
    exp_figure_saver = figure_saver(sub_directory=experiment.name)

    # Operate on cached data only.
    get_data(experiment, cache_check=True)
    _, exog_data, _ = get_endog_exog_mask(experiment)

    def df_cols_to_str(df):
        df.columns = list(map(lambda s: shorten_features(str(s)), df.columns))
        return df

    # with exp_figure_saver("corr_plot"):
    #     corr_plot(
    #         df_cols_to_str(
    #             exog_data[
    #                 list(
    #                     sort_variables(
    #                         var for var in exog_data.columns if var.shift <= 9
    #                     )
    #                 )
    #             ]
    #         ),
    #         fig_kwargs={"figsize": (12, 8)},
    #     )
    #     plt.grid(False)

    with exp_figure_saver(f"{experiment.name}_corr_plot_full"):
        corr_plot(
            df_cols_to_str(exog_data[list(sort_variables(exog_data.columns))]),
            rotation=70,
            fig_kwargs={"figsize": (8.2, 6.3)},
        )
        plt.grid(False)
def fit_buffered_loo_sample(experiment,
                            radius,
                            max_rad,
                            seed,
                            cache_check=False,
                            **kwargs):
    # Operate on cached data only.
    get_endog_exog_mask.check_in_store(experiment)
    endog_data, exog_data, master_mask = get_endog_exog_mask(experiment)

    bloo_kwargs = dict(
        exog_data=exog_data,
        endog_data=endog_data,
        master_mask=master_mask,
        radius=radius,
        max_rad=max_rad,
        extrapolation_check=False,
        seed=seed,
        verbose=False,
        dpi=300,
    )
    if cache_check:
        return buffered_leave_one_out.check_in_store(**bloo_kwargs)
    (
        test_indices,
        n_ignored,
        n_train,
        n_hold_out,
        total_samples,
        hold_out_y,
        predicted_y,
    ) = buffered_leave_one_out(**bloo_kwargs)

    data_info = (
        test_indices,
        n_ignored,
        n_train,
        n_hold_out,
        total_samples,
    )

    # Prevents memory buildup over repeated calls.
    gc.collect()

    return (data_info, hold_out_y, predicted_y)
def fit_random_binary_dilation(experiment,
                               structure,
                               test_frac,
                               seed,
                               cache_check=False,
                               **kwargs):
    if cache_check:
        get_data(experiment, cache_check=True)

    endog_data, exog_data, master_mask = get_endog_exog_mask(experiment)

    split_kwargs = dict(
        exog_data=exog_data,
        endog_data=endog_data,
        master_mask=master_mask,
        structure=structure,
        test_frac=test_frac,
        seed=seed,
        verbose=False,
    )
    if cache_check:
        random_binary_dilation_split.check_in_store(**split_kwargs)
    (
        desc_str,
        data_info,
        X_train,
        X_test,
        y_train,
        y_test,
    ) = random_binary_dilation_split(**split_kwargs)

    model = optional_client_call(
        get_model,
        dict(X_train=X_train, y_train=y_train),
        cache_check=cache_check,
    )[0]

    if cache_check:
        return get_model_scores.check_in_store(model, X_test, X_train, y_test,
                                               y_train)
    return data_info, get_model_scores(model, X_test, X_train, y_test, y_train)
def plot_obs_pred_comp(experiment, **kwargs):
    # Operate on cached data/models only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_val = get_experiment_split_data(experiment)
    get_model(X_train, y_train, cache_check=True)

    get_endog_exog_mask.check_in_store(experiment)
    master_mask = get_endog_exog_mask(experiment)[2]

    check_master_masks(master_mask)

    u_pre = threading_get_model_predict(
        X_train=X_train,
        y_train=y_train,
        predict_X=X_test,
    )

    obs_pred_diff_cube = get_obs_pred_diff_cube(y_val, u_pre, master_mask)

    with map_figure_saver(sub_directory=experiment.name)(
            f"{experiment.name}_obs_pred_comp", sub_directory="predictions"):
        disc_cube_plot(
            obs_pred_diff_cube,
            fig=plt.figure(figsize=(5.1, 2.3)),
            cmap="BrBG",
            cmap_midpoint=0,
            cmap_symmetric=False,
            bin_edges=[-0.01, -0.001, -1e-4, 0, 0.001, 0.01, 0.02],
            extend="both",
            cbar_format=get_sci_format(ndigits=0),
            cbar_pad=0.025,
            cbar_label="Ob. - Pr.",
            **get_aux0_aux1_kwargs(y_val, master_mask),
            loc=(0.83, 0.14),
            height=0.055,
            aspect=1,
            spacing=0.06 * 0.2,
        )
Esempio n. 6
0
def multi_model_ale_plot(*args, verbose=False, **kwargs):
    # Experiments for which data will be plotted.
    experiments = [
        Experiment["ALL"],
        Experiment["TOP15"],
        Experiment["CURR"],
        Experiment["BEST15"],
        Experiment["15VEG_FAPAR"],
        Experiment["15VEG_LAI"],
        Experiment["15VEG_VOD"],
        Experiment["15VEG_SIF"],
        Experiment["CURRDD_FAPAR"],
        Experiment["CURRDD_LAI"],
        Experiment["CURRDD_VOD"],
        Experiment["CURRDD_SIF"],
    ]

    # Operate on cached data/models only.
    experiment_masks = []
    plotting_experiment_data = {}

    for experiment in tqdm(experiments, desc="Loading data"):
        get_data(experiment, cache_check=True)
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(
            experiment)
        get_model(X_train, y_train, cache_check=True)

        experiment_masks.append(get_endog_exog_mask(experiment)[2])
        plotting_experiment_data[experiment] = dict(
            model=get_model(X_train, y_train),
            X_train=X_train,
        )

    # Ensure masks are aligned.
    check_master_masks(*experiment_masks)

    lags = (0, 1, 3, 6, 9)

    for comp_vars in [[variable.FAPAR, variable.LAI],
                      [variable.SIF, variable.VOD]]:
        fig, axes = plt.subplots(5, 2, sharex="col", figsize=(7.0, 5.8))

        # Create general legend labels (with 'X' instead of FAPAR, or LAI, etc...).
        mod_exp_plot_kwargs = deepcopy(experiment_plot_kwargs)
        for plot_kwargs in mod_exp_plot_kwargs.values():
            if plot_kwargs["label"].startswith("15VEG_"):
                plot_kwargs["label"] = "15VEG_X"
            elif plot_kwargs["label"].startswith("CURRDD_"):
                plot_kwargs["label"] = "CURRDD_X"

        x_factor_exp = 0
        x_factor = 10**x_factor_exp
        # x_factor_str = rf"$10^{{{x_factor_exp}}}$"

        y_factor_exp = -4
        y_factor = 10**y_factor_exp
        y_factor_str = rf"$10^{{{y_factor_exp}}}$"

        multi_model_ale_1d(
            comp_vars[0],
            plotting_experiment_data,
            mod_exp_plot_kwargs,
            verbose=verbose,
            legend_bbox=(0.5, 1.01),
            fig=fig,
            axes=axes[:, 0:1],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )
        multi_model_ale_1d(
            comp_vars[1],
            plotting_experiment_data,
            experiment_plot_kwargs,
            verbose=verbose,
            legend=False,
            fig=fig,
            axes=axes[:, 1:2],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )

        for ax in axes[:, 1]:
            ax.set_ylabel("")
        for ax in axes[:, 0]:
            lag_match = re.search("(\dM)", ax.get_xlabel())
            if lag_match:
                lag_m = f" {lag_match.group(1)}"
            else:
                lag_m = ""
            ax.set_ylabel(f"ALE{lag_m} ({y_factor_str} BA)")
        for ax in axes.flatten():
            ax.set_xlabel("")

        for ax, var in zip(axes[-1], comp_vars):
            assert x_factor_exp == 0
            ax.set_xlabel(
                f"{shorten_features(str(var))} ({variable.units[var]})")

        for ax, title in zip(axes.flatten(), ascii_lowercase):
            ax.text(0.5, 1.05, f"({title})", transform=ax.transAxes)

        margin = 0.4

        for ax in axes.ravel():
            ax.set_xlim(-margin, 20 + margin)

        fig.tight_layout(h_pad=0.4)
        fig.align_labels()

        figure_saver.save_figure(
            fig,
            f"{'__'.join(map(shorten_features, map(str, comp_vars)))}_ale_comp",
            sub_directory="ale_comp",
        )
Esempio n. 7
0
logger = logging.getLogger(__name__)
enable_logging(level="WARNING")

warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")
warnings.filterwarnings("ignore", ".*DEFAULT_SPHERICAL_EARTH_RADIUS.*")
warnings.filterwarnings("ignore", ".*guessing contiguous bounds.*")

warnings.filterwarnings(
    "ignore", 'Setting feature_perturbation = "tree_path_dependent".*')

if __name__ == "__main__":
    experiment = Experiment["15VEG_FAPAR"]

    # Operate on cached model / data only.
    get_endog_exog_mask.check_in_store(experiment)
    endog_data, _, master_mask = get_endog_exog_mask(experiment)

    check_master_masks(master_mask)

    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    get_model(X_train, y_train, cache_check=True)
    rf = get_model(X_train, y_train)

    get_shap_values.check_in_store(rf=rf, X=X_test)
    shap_values = get_shap_values(rf=rf, X=X_test)

    # Analysis / plotting parameters.
    diff_threshold = 0.5
    ptp_threshold_factor = 0.12  # relative to the mean
def prediction_comparisons():
    """Compare ALL and CURR predictions."""
    experiments = [Experiment.ALL, Experiment.CURR]
    # Operate on cached data/models only.

    experiment_data = {}
    experiment_models = {}

    for experiment in experiments:
        get_data(experiment, cache_check=True)
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)
        get_model(X_train, y_train, cache_check=True)

        experiment_data[experiment] = get_endog_exog_mask(experiment)
        experiment_models[experiment] = get_model(X_train, y_train)

    # Ensure masks are aligned.
    check_master_masks(*(data[2] for data in experiment_data.values()))

    master_mask = next(iter(experiment_data.values()))[2]

    # Record predictions and errors.
    experiment_predictions = {}
    experiment_errors = {}
    map_experiment_predictions = {}
    map_experiment_errors = {}

    for experiment in experiments:
        X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)
        predicted_test = threading_get_model_predict(
            X_train=X_train,
            y_train=y_train,
            predict_X=X_test,
        )

        print("Experiment:", experiment.name)
        print("mean observed test:", np.mean(y_test.values))
        print("mean predicted test:", np.mean(predicted_test))
        print("lowest observed test:", np.min(y_test.values))
        print(
            "fraction of times this occurs:",
            np.sum(y_test.values == np.min(y_test.values)) / y_test.values.size,
        )
        print("lowest test prediction:", np.min(predicted_test))

        experiment_predictions[experiment] = predicted_test
        experiment_errors[experiment] = y_test.values - predicted_test

        map_experiment_predictions[experiment] = get_mm_data(
            experiment_predictions[experiment], master_mask, kind="val"
        )
        map_experiment_errors[experiment] = get_mm_data(
            experiment_errors[experiment], master_mask, kind="val"
        )

    error_mag_diff = np.abs(map_experiment_errors[experiments[1]]) - np.abs(
        map_experiment_errors[experiments[0]]
    )

    y_test = get_experiment_split_data(experiment)[3]

    rel_error_mag_diff = np.mean(error_mag_diff, axis=0) / np.mean(
        get_mm_data(y_test.values, master_mask, kind="val"), axis=0
    )
    all_rel = get_unmasked(rel_error_mag_diff)

    print(f"% >0: {100 * np.sum(all_rel > 0) / all_rel.size:0.1f}")
    print(f"% <0: {100 * np.sum(all_rel < 0) / all_rel.size:0.1f}")

    fig, ax, cbar = disc_cube_plot(
        dummy_lat_lon_cube(rel_error_mag_diff),
        bin_edges=(-0.5, 0, 0.5),
        extend="both",
        cmap="PiYG",
        cmap_midpoint=0,
        cmap_symmetric=False,
        cbar_label=f"<|Err({experiments[1].name})| - |Err({experiments[0].name})|> / <Ob.>",
        cbar_shrink=0.3,
        cbar_aspect=15,
        cbar_extendfrac=0.1,
        cbar_pad=0.02,
        cbar_format=None,
        **get_aux0_aux1_kwargs(y_test, master_mask),
        loc=(0.79, 0.14),
        height=0.05,
        aspect=1.25,
        spacing=0.06 * 0.2,
    )
    cbar.ax.yaxis.label.set_size(7)

    map_figure_saver.save_figure(
        fig, f"rel_error_mag_diff_{'_'.join(map(attrgetter('name'), experiments))}"
    )