def get_model_performance_state(model,
                                yield_type='rainfed',
                                prediction_type='forward',
                                state_train=False):
    d = load_prediction(model,
                        yield_type=yield_type,
                        prediction_type=prediction_type,
                        state=state_train)
    d_r2 = prediction_result_by_state(d, yield_type=yield_type)
    d_r2.loc[:, 'rmse'] = d_r2.loc[:, 'rmse'] * 0.0628  # convert to t/ha
    return d_r2
예제 #2
0
def get_national_prediction(model,
                            yield_type='rainfed',
                            prediction_type='forward',
                            area_weight=False,
                            direct_fn=False):
    df = load_prediction(model,
                         yield_type=yield_type,
                         prediction_type=prediction_type,
                         direct_fn=direct_fn)
    df = df.dropna()
    state_2003 = [
        'ILLINOIS', 'INDIANA', 'IOWA', 'MISSOURI', 'NEBRASKA', 'NORTH DAKOTA',
        'WISCONSIN'
    ]

    # Only 7 states before 2007
    con = df.State.isin(state_2003) & (df.year < 2007)
    #  con = df.State.isin(state_2003)
    # all states after 2007
    con = con | (df.year >= 2007)

    if area_weight:
        df['Predicted_' + yield_type_dict[yield_type] +
           '_area'] = (df['Predicted_' + yield_type_dict[yield_type]] *
                       df[area_type_dict[yield_type]])
        df[yield_type_dict[yield_type] +
           '_area'] = (df[yield_type_dict[yield_type]] *
                       df[area_type_dict[yield_type]])

        yield_predicted=df[con].groupby('year').sum()['Predicted_' + yield_type_dict[yield_type] + '_area']/ \
            df[con].groupby('year').sum()[area_type_dict[yield_type]]
        yield_actual=df[con].groupby('year').sum()[yield_type_dict[yield_type] + '_area']/ \
            df[con].groupby('year').sum()[area_type_dict[yield_type]]
    else:
        yield_predicted = df[con].groupby('year').mean()[
            'Predicted_' + yield_type_dict[yield_type]]
        yield_actual = df[con].groupby('year').mean()[
            yield_type_dict[yield_type]]
    return yield_predicted, yield_actual
예제 #3
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from func_crop_model import prediction_result_global
from plot_model_comparisons import load_prediction

# Constrain to seven states due to CDL availiability
state_2003 = [
    'ILLINOIS', 'INDIANA', 'IOWA', 'MISSOURI', 'NEBRASKA', 'NORTH DAKOTA',
    'WISCONSIN'
]

# Best climate model
d0 = load_prediction('vpd_spline',
                     yield_type='rainfed',
                     prediction_type='leave_one_year_out')
d0_r2 = prediction_result_global(d0[d0.State.isin(state_2003)],
                                 yield_type='rainfed')
d0_r2.loc[:, 'rmse'] = d0_r2.loc[:, 'rmse'] * 0.0628  # convert to t/ha

# Best climate + evi
d = load_prediction('vpd_spline_evi_poly',
                    yield_type='rainfed',
                    prediction_type='leave_one_year_out')
d1_r2 = prediction_result_global(d[d.State.isin(state_2003)],
                                 yield_type='rainfed')
d1_r2.loc[:, 'rmse'] = d1_r2.loc[:, 'rmse'] * 0.0628  # convert to t/ha

print(d0_r2)
print(d1_r2)
def make_plot(model='vpd_spline_evi_poly'):
    data_12 = load_yield_data()
    data_12.loc[:, 'yield_rainfed'] = data_12.loc[:, 'yield_rainfed'] * 0.0628

    # Constrain to seven states due to CDL availiability
    state_2003 = [
        'ILLINOIS', 'INDIANA', 'IOWA', 'MISSOURI', 'NEBRASKA', 'NORTH DAKOTA',
        'WISCONSIN'
    ]

    #    state_2003 = ['ILLINOIS', 'INDIANA', 'IOWA', 'KANSAS', 'MICHIGAN', 'MINNESOTA',
    #       'MISSOURI', 'NEBRASKA', 'NORTH DAKOTA', 'OHIO', 'SOUTH DAKOTA', 'WISCONSIN']

    # Load best climate + evi
    d = load_prediction(model,
                        yield_type='rainfed',
                        prediction_type='leave_one_year_out')
    d1_r2 = prediction_result_global(d[d.State.isin(state_2003)],
                                     yield_type='rainfed')
    d1_r2.loc[:, 'rmse'] = d1_r2.loc[:, 'rmse'] * 0.0628  # convert to t/ha

    d_pre = d1_r2.join(data_12[data_12.State.isin(state_2003)].groupby(
        'year').mean()['precip_gs_z'].to_frame())
    d_yieldstd = d1_r2.join(data_12[data_12.State.isin(state_2003)].groupby(
        'year').std()['yield_rainfed'].to_frame())

    fig, axes = plt.subplots(2, 2, figsize=(10, 7.5))
    d_pre.plot.scatter(x='precip_gs_z', y='R2', ax=axes[0, 0])

    plot_name(d_pre, 'precip_gs_z', 'R2', axes[0, 0])
    plot_fitting(d_pre, 'precip_gs_z', 'R2', axes[0, 0], order=2)

    d_pre.plot.scatter(x='precip_gs_z', y='rmse', ax=axes[0, 1])
    plot_name(d_pre, 'precip_gs_z', 'rmse', axes[0, 1])
    plot_fitting(d_pre, 'precip_gs_z', 'rmse', axes[0, 1], order=2)

    axes[0, 0].set_title('$R^2$')
    axes[0, 1].set_title('RMSE (t/ha)')

    axes[0, 0].set_xlabel('Precipitation standard anomaly', fontsize=12)
    axes[0, 1].set_xlabel('Precipitation standard anomaly', fontsize=12)

    axes[0, 0].set_ylabel('')
    axes[0, 1].set_ylabel('')

    d_yieldstd.plot.scatter(x='yield_rainfed', y='R2', ax=axes[1, 0])
    plot_name(d_yieldstd, 'yield_rainfed', 'R2', axes[1, 0])
    plot_fitting(d_yieldstd, 'yield_rainfed', 'R2', axes[1, 0], order=1)

    d_yieldstd.plot.scatter(x='yield_rainfed', y='rmse', ax=axes[1, 1])
    plot_name(d_yieldstd, 'yield_rainfed', 'rmse', axes[1, 1])
    plot_fitting(d_yieldstd, 'yield_rainfed', 'rmse', axes[1, 1], order=1)

    axes[1, 0].set_xlabel('Spatial yield variability (t/ha)', fontsize=12)
    axes[1, 1].set_xlabel('Spatial yield variability (t/ha)', fontsize=12)

    axes[1, 0].set_ylabel('')
    axes[1, 1].set_ylabel('')

    # Add panel label
    for i, s in enumerate([chr(i) for i in range(ord('a'), ord('d') + 1)]):
        axes.flatten()[i].text(0.01,
                               0.95,
                               s,
                               fontsize=12,
                               transform=axes.flatten()[i].transAxes,
                               fontweight='bold')

    plt.subplots_adjust(left=0.05,
                        right=0.95,
                        top=0.9,
                        bottom=0.1,
                        hspace=0.25)

    # plt.savefig('../figure/figure_model_performance_interannual_variability_causes_12states.pdf')
    plt.savefig(
        '../figure/figure_model_performance_interannual_variability_causes_%s.pdf'
        % model)
    # plt.savefig('../figure/test.pdf')
    print('figure saved')
def make_plot(model='vpd_poly_evi_poly',
              yield_type='rainfed',
              prediction_type='leave_one_year_out'):
    # Load state model performance
    d_r2 = get_model_performance_state(model,
                                       yield_type=yield_type,
                                       prediction_type=prediction_type,
                                       state_train=True)
    # d = load_prediction(model, yield_type=yield_type, prediction_type=prediction_type,state=True)
    # d_r2 = prediction_result_by_state(d)

    # Load global model performance
    dg_r2 = get_model_performance_state(model,
                                        yield_type=yield_type,
                                        prediction_type=prediction_type,
                                        state_train=False)
    d2 = load_prediction(model,
                         yield_type=yield_type,
                         prediction_type=prediction_type)

    # Get mean county number of each state across years
    county_n = d2[d2.year >= 2007].dropna().groupby(
        ['year',
         'State']).count().mean(level=1)['yield_rainfed'].apply(np.ceil)
    county_n.index = county_n.index.map(lambda x: x.title())

    # dg_r2 = prediction_result_by_state(d2)

    #
    if ('evi' in model) | ('lst' in model):
        year_begin = 2007
    else:
        year_begin = 2005

    d3_r2 = get_boxplot_data(
        dg_r2.loc[(slice(None), slice(year_begin, 2016)), :],
        d_r2.loc[(slice(None), slice(year_begin, 2016)), :], 'R2')
    d3_rmse = get_boxplot_data(
        dg_r2.loc[(slice(None), slice(year_begin, 2016)), :],
        d_r2.loc[(slice(None), slice(year_begin, 2016)), :], 'rmse')

    d3_r2.loc[:, 'State'] = d3_r2.State.apply(lambda x: x.title())
    d3_rmse.loc[:, 'State'] = d3_rmse.State.apply(lambda x: x.title())

    # Begin plot
    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(11, 5.5))

    meanlineprops = dict(linestyle='--', color='k')

    sns.boxplot(x='State',
                y='R2',
                hue='Model',
                data=d3_r2,
                fliersize=0,
                ax=ax1,
                meanline=True,
                showmeans=True,
                palette='Set3',
                meanprops=meanlineprops)
    sns.boxplot(x='State',
                y='rmse',
                hue='Model',
                data=d3_rmse,
                fliersize=0,
                ax=ax2,
                meanline=True,
                showmeans=True,
                palette='Set3',
                meanprops=meanlineprops)

    ax1.set_ylabel('R2', fontsize=12)
    ax2.set_ylabel('RMSE (t/ha)', fontsize=12)

    ax1.text(0.01,
             0.90,
             'a',
             fontsize=12,
             transform=ax1.transAxes,
             fontweight='bold')
    ax2.text(0.01,
             0.90,
             'b',
             fontsize=12,
             transform=ax2.transAxes,
             fontweight='bold')

    # Create fake lines for legend
    mean_line = mlines.Line2D([], [],
                              linestyle='--',
                              color=ax1.artists[0].get_edgecolor())
    median_line = mlines.Line2D([], [],
                                linestyle='-',
                                color=ax1.artists[0].get_edgecolor())

    ax1.legend_.set_frame_on(False)
    # Rotate last row xticklabels
    # ax1.set_xticklabels(ax1.get_xticklabels(), rotation=10)
    # ax2.set_xticklabels(ax2.get_xticklabels(), rotation=10)

    ax2.legend([median_line, mean_line], ['median', 'mean'], loc='upper right')

    # add county number
    for i, s in enumerate(county_n.values):
        ax2.text(i, -0.5, '(%d)' % s, fontsize=10, ha='center')

    ax2.legend_.set_frame_on(False)

    ax2.set_ylim([0, 2.5])  # 40 bu *0.0628
    ax1.set_xlabel('')
    ax2.set_xlabel('')

    plt.subplots_adjust(left=0.05, right=0.975, hspace=0.3, top=0.95)

    plt.savefig(
        '../figure/figure_global_state_model_comparison_%s_%s_%s_test.pdf' %
        (model, yield_type, prediction_type))
    # plt.savefig('../figure/test.pdf')
    print('figure saved')