Example #1
0
def plot_alltogether(time_lag,
                     lon,
                     lat,
                     ts1,
                     ts2,
                     scale_ts=False,
                     save_fig=False,
                     *args):

    matched_data = temp_match.matching(ts1, ts2, *args)
    if len(matched_data) == 0:
        print "Empty dataset."
        return
    if scale_ts:
        matched_data = scaling.scale(matched_data, method="mean_std")

    matched_data.plot(figsize=(15, 5))
    plt.title('SWI and Vegetation indices comparison (rescaled)')
    if save_fig:
        plt.savefig("C:\\Users\\i.pfeil\\Desktop\\TS_plots\\lon_" + str(lon) +
                    "_lat_" + str(lat) + '_' + str(time_lag) + ".png",
                    bbox_inches='tight')
        plt.clf()
    else:
        plt.show()
Example #2
0
def calc_rho(ascat_ssm, FP_df, hoal_df):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54

    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'],
                            hoal_df['HOAL_sm0.05'])
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()

    data_together = scale(matched_data)  #, method="mean_std")

    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                  data_together['ssm_ascat'].iloc[:-3])

    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                    data_together['HOAL_sm0.05'].iloc[:-3])

    exclude = [
        'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'
    ]
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title(
        'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22',
        fontsize=24)
    #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
    #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]', fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.ylim([0, 60])
    plt.show()
Example #3
0
def calc_rho(ascat_ssm, FP_df, hoal_df):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    
    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], 
                            hoal_df['HOAL_sm0.05'])
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()
    
    data_together = scale(matched_data)#, method="mean_std")
    
    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                  data_together['ssm_ascat'].iloc[:-3])
    
    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    
    exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s',
               'merge_key']
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22',
              fontsize=24)
              #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
              #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]',fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.ylim([0,60])
    plt.show()
def test_matching_series():
    """
    test matching function with pd.Series as input
    """
    data = np.arange(5.0)
    data[3] = np.nan

    ref_ser = pd.Series(
        data,
        index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D"),
    )
    match_ser = pd.Series(
        np.arange(5),
        index=[
            datetime(2007, 1, 1, 9),
            datetime(2007, 1, 2, 9),
            datetime(2007, 1, 3, 9),
            datetime(2007, 1, 4, 9),
            datetime(2007, 1, 5, 9),
        ],
        name="matched_data",
    )

    matched = tmatching.matching(ref_ser, match_ser)

    nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data)
    assert len(matched) == 4
def test_matching():
    """
    test matching function
    """
    data = np.arange(5.0)
    data[3] = np.nan

    ref_df = pd.DataFrame(
        {"data": data},
        index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D"),
    )
    match_df = pd.DataFrame(
        {"matched_data": np.arange(5)},
        index=[
            datetime(2007, 1, 1, 9),
            datetime(2007, 1, 2, 9),
            datetime(2007, 1, 3, 9),
            datetime(2007, 1, 4, 9),
            datetime(2007, 1, 5, 9),
        ],
    )
    matched = tmatching.matching(ref_df, match_df)

    nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data)
    assert len(matched) == 4
Example #6
0
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54

    # in welcher Reihenfolge matchen
    data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw)
    data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw)
    data_together2 = matching(FP_df, hoal_df)
    print('ref: FP', data_together)
    print('ref: ASCAT', data_together1)

    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                  data_together['ssm_ascat'].iloc[:-3])

    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    hoal_rho_ts = metrics.spearmanr(
        data_together['air_temperature_' + 'celsius'].iloc[:-3],
        data_together['HOAL_ts0.05'].iloc[:-3])
    hoal_raw_rho_sm = metrics.spearmanr(
        data_together['Parrot_vwc'].iloc[:-3],
        data_together['HOAL_raw_sm1'].iloc[:-3])

    print ascat_rho
    print hoal_rho_sm
    print hoal_rho_ts
    print hoal_raw_rho_sm

    exclude = [
        'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'
    ]
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title(
        'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22' +
        '\n rho_ASCAT_Parrot: ' + str(np.round(ascat_rho[0], 3)) +
        ', rho_HOAL_Parrot: ' + str(np.round(hoal_rho_sm[0], 3)) +
        ', rho_HOAL_raw_Parrot: ' + str(np.round(hoal_raw_rho_sm[0], 3)))
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()

    data_together1.ix[:, data_together1.columns.difference(exclude)].plot()
    plt.show()
    data_together2.ix[:, data_together2.columns.difference(exclude)].plot()
    plt.show()
Example #7
0
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    
    # in welcher Reihenfolge matchen
    data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw)
    data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw)
    data_together2 = matching(FP_df, hoal_df)
    print('ref: FP', data_together)
    print('ref: ASCAT', data_together1)
    
    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                  data_together['ssm_ascat'].iloc[:-3])
    
    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    hoal_rho_ts = metrics.spearmanr(data_together['air_temperature_'+
                                                       'celsius'].iloc[:-3], 
                                    data_together['HOAL_ts0.05'].iloc[:-3])
    hoal_raw_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_raw_sm1'].iloc[:-3])
    
    print ascat_rho
    print hoal_rho_sm
    print hoal_rho_ts
    print hoal_raw_rho_sm

    exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s',
               'merge_key']
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22'+
              '\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
              ', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3))+
              ', rho_HOAL_raw_Parrot: '+str(np.round(hoal_raw_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()
    
    data_together1.ix[:, data_together1.columns.difference(exclude)].plot()
    plt.show()
    data_together2.ix[:, data_together2.columns.difference(exclude)].plot()
    plt.show()
Example #8
0
def rescale_df(ascat_ssm, FP_df):
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54
    ascat_ssm.plot()
    plt.show()

    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'])
    matched_data.plot()
    plt.show()

    scaled_data = scale(matched_data, method="mean_std")

    scaled_data.plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen')
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()
Example #9
0
def rescale_df(ascat_ssm, FP_df):
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    ascat_ssm.plot()
    plt.show()
    
    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'])
    matched_data.plot()
    plt.show()
    
    scaled_data = scale(matched_data, method="mean_std")
    
    scaled_data.plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen')
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()
Example #10
0
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max):
    df = create_drought_dist(lat_min, lat_max, lon_min, lon_max)
    df.drought = df.drought * (-1)
    drought = pd.DataFrame(df.drought)
    tt = [1, 5, 10, 15, 20, 40, 60, 100]
    for t in tt:
        print t
        df_swi = read_ts_area('C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc',
                              'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max)
        anomaly_swi = anomaly(df_swi)

        df_anom = anomaly_swi.loc[:'20150626'] / 100
        match = temp_match.matching(drought, df_anom)
        s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1])
        print s_rho, s_p
Example #11
0
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max):
    df = create_drought_dist(lat_min, lat_max, lon_min, lon_max)
    df.drought = df.drought * (-1)
    drought = pd.DataFrame(df.drought)
    tt = [1, 5, 10, 15, 20, 40, 60, 100]
    for t in tt:
        print t
        df_swi = read_ts_area(
            'C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc',
            'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max)
        anomaly_swi = anomaly(df_swi)

        df_anom = anomaly_swi.loc[:'20150626'] / 100
        match = temp_match.matching(drought, df_anom)
        s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1])
        print s_rho, s_p
Example #12
0
def rescale_df(ascat_ssm, FP_df, hoal_df):
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    #ascat_ssm.plot()
    #plt.show()
    
    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df)
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()
    
    scaled_data = scale(matched_data)#, method="mean_std")
    
    scaled_data.plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen')
    plt.ylabel('Volumetric Water Content [%]')
    plt.ylim([0,60])
    plt.show()
Example #13
0
def rescale_df(ascat_ssm, FP_df, hoal_df):
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54
    #ascat_ssm.plot()
    #plt.show()

    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df)
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()

    scaled_data = scale(matched_data)  #, method="mean_std")

    scaled_data.plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen')
    plt.ylabel('Volumetric Water Content [%]')
    plt.ylim([0, 60])
    plt.show()
Example #14
0
def test_matching():
    """
    test matching function
    """
    data = np.arange(5.0)
    data[3] = np.nan

    ref_df = pd.DataFrame({"data": data}, index=pd.date_range(datetime(2007, 1, 1, 0),
                                                              "2007-01-05", freq="D"))
    match_df = pd.DataFrame({"matched_data": np.arange(5)},
                            index=[datetime(2007, 1, 1, 9),
                                   datetime(2007, 1, 2, 9),
                                   datetime(2007, 1, 3, 9),
                                   datetime(2007, 1, 4, 9),
                                   datetime(2007, 1, 5, 9)])
    matched = tmatching.matching(ref_df, match_df)

    nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data)
    assert len(matched) == 4
Example #15
0
def test_matching_series():
    """
    test matching function with pd.Series as input
    """
    data = np.arange(5.0)
    data[3] = np.nan

    ref_ser = pd.Series(data, index=pd.date_range(datetime(2007, 1, 1, 0),
                                                  "2007-01-05", freq="D"))
    match_ser = pd.Series(np.arange(5),
                          index=[datetime(2007, 1, 1, 9),
                                 datetime(2007, 1, 2, 9),
                                 datetime(2007, 1, 3, 9),
                                 datetime(2007, 1, 4, 9),
                                 datetime(2007, 1, 5, 9)],
                          name='matched_data')

    matched = tmatching.matching(ref_ser, match_ser)

    nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data)
    assert len(matched) == 4
Example #16
0
def corr(paths,
         corr_df,
         start_date,
         end_date,
         lon=None,
         lat=None,
         vi_str='NDVI',
         time_lags=[0, 10, 20, 30, 40, 50, 60, 100],
         plot_time_lags=False):
    """ Calculate Spearman's Rho and p-value for SWI (all t-values) and 
    specified VI (default NDVI).
    If plot_time_lags is True, a plot of VI (for different time lags) over
    SWI (all t-values) is created.
    
    Parameters:
    -----------
    paths : dict
        Paths to datasets
    corr_df : pd.DataFrame
        DataFrame where correlation coeff.s are stored
    start_date, end_date : datetime
        Start and end date
    vi_str : str, optional
        Vegetation index to use, default: NDVI
    time_lag : int, optional
        time lag for shifting VI, default: 0 (days)
    plot_time_lags : bool, optional
        Plot (shifted) VI over SWIs, default: False
        
    Returns:
    --------
    corr_df : pd.DataFrame
        DataFrame containing the correlation coeff.s
    """

    swi_path = paths['SWI']
    vi_path = paths[vi_str]

    # read SWI for different T-values and VI
    swi_list = [
        'SWI_001', 'SWI_010', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100'
    ]
    swi_df = read_ts(swi_path,
                     lon=lon,
                     lat=lat,
                     params=swi_list,
                     start_date=start_date,
                     end_date=end_date)
    vi = read_ts(vi_path,
                 lon=lon,
                 lat=lat,
                 params=vi_str,
                 start_date=start_date,
                 end_date=end_date)
    vi[vi_str][np.where(vi == 255)[0]] = np.NaN

    water = {}
    for swi_key in swi_list:
        water[swi_key] = swi_df[swi_key]

    # rescale VI before further processing using method from Peng et al., 2014
    for ds_water in water:
        water[ds_water] = rescale_peng(water[ds_water],
                                       np.nanmin(water[ds_water]),
                                       np.nanmax(water[ds_water]))

    vi_resc = rescale_peng(vi, np.nanmin(vi), np.nanmax(vi))

    # insert time lag
    for time_lag in time_lags:
        if time_lag > 0:
            vi = vi_resc.copy()
            vi_idx = vi.index + timedelta(days=time_lag)
            vi = pd.DataFrame(vi.values, columns=[vi_str], index=vi_idx)

        # plot vi time lags over SWI
        if plot_time_lags and time_lag == 0:
            vi0 = vi.copy()
            vi_idx10 = vi.index + timedelta(days=10)
            vi10 = pd.DataFrame(vi.values, columns=['vi10'], index=vi_idx10)
            vi_idx20 = vi.index + timedelta(days=20)
            vi20 = pd.DataFrame(vi.values, columns=['vi20'], index=vi_idx20)
            vi_idx30 = vi.index + timedelta(days=30)
            vi30 = pd.DataFrame(vi.values, columns=['vi30'], index=vi_idx30)
            vi_idx40 = vi.index + timedelta(days=40)
            vi40 = pd.DataFrame(vi.values, columns=['vi40'], index=vi_idx40)
            vi_idx50 = vi.index + timedelta(days=50)
            vi50 = pd.DataFrame(vi.values, columns=['vi50'], index=vi_idx50)
            vi_idx60 = vi.index + timedelta(days=60)
            vi60 = pd.DataFrame(vi.values, columns=['vi60'], index=vi_idx60)
            vi_idx100 = vi.index + timedelta(days=100)
            vi100 = pd.DataFrame(vi.values, columns=['vi100'], index=vi_idx100)

            plot_alltogether(0, lon, lat, swi_df, vi0, save_fig=True)
            plot_alltogether(10, lon, lat, swi_df, vi10, save_fig=True)
            plot_alltogether(20, lon, lat, swi_df, vi20, save_fig=True)
            plot_alltogether(30, lon, lat, swi_df, vi30, save_fig=True)
            plot_alltogether(40, lon, lat, swi_df, vi40, save_fig=True)
            plot_alltogether(50, lon, lat, swi_df, vi50, save_fig=True)
            plot_alltogether(60, lon, lat, swi_df, vi60, save_fig=True)
            plot_alltogether(100, lon, lat, swi_df, vi100, save_fig=True)

        vegetation = {vi_str: vi}

        # calculate Spearman's Rho and p-value for VI and SWIs
        for ds_veg in vegetation.keys():
            for ds_water in sorted(water.keys()):
                data_together = temp_match.matching(water[ds_water],
                                                    vegetation[ds_veg])
                rho, p = metrics.spearmanr(data_together[ds_water],
                                           data_together[ds_veg])
                # mask values with p-value > 0.05
                if p > 0.05:
                    rho = np.NaN
                if ds_veg + '_' + ds_water + '_rho' in corr_df.columns:
                    corr_df[ds_veg + '_' + ds_water + '_rho'].iloc[np.where(
                        corr_df.index == time_lag)] = rho
                    corr_df[ds_veg + '_' + ds_water +
                            '_p'].iloc[np.where(corr_df.index == time_lag)] = p
                else:
                    corr_df[ds_veg + '_' + ds_water + '_rho'] = pd.Series(
                        rho, index=[time_lag])
                    corr_df[ds_veg + '_' + ds_water + '_p'] = pd.Series(
                        p, index=[time_lag])

    return corr_df
Example #17
0
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None):
    """
    Compare data from an ISMN station to the defined validation datasets.

    Parameters
    ----------
    ismn_data: pandas.Dataframe
        Data from the ISMN used as a reference
    validation_data: dict
        Dictionary of pandas.DataFrames, One for each dataset to
        compare against
    scaling: string, optional
        Scaling method to use.
    anomaly: string
        If set then the validation is done for anomalies.
    """
    insitu_label = 'soil moisture'

    if anomaly != None:

        if anomaly == 'climatology':
            ascat_clim = anomaly_calc.calc_climatology(
                ascat_masked[ascat_label])
            insitu_clim = anomaly_calc.calc_climatology(
                ismn_data['soil moisture'])

            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label],
                                                   climatology=ascat_clim)
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'],
                                                    climatology=insitu_clim)
            ISMN_data['insitu'] = insitu_anom.values

        if anomaly == 'average':
            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label])
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'])
            ISMN_data['insitu'] = insitu_anom.values

        ascat_masked = ascat_masked.dropna()
        ISMN_data = ISMN_data.dropna()

    for dname in validation_data:
        vdata = validation_data[dname]
        vdata_label = 'cci_sm'

        matched_data = temp_match.matching(ismn_data, vdata, window=1)

        if scaling != 'noscale' and scaling != 'porosity':

            scaled_data = scale.add_scaled(matched_data,
                                           label_in=vdata_label,
                                           label_scale=insitu_label,
                                           method=scaling)

            scaled_label = vdata_label + '_scaled_' + scaling

            scaled_data = scaled_data[[insitu_label, scaled_label]]

        elif scaling == 'noscale':
            scaled_data = matched_data[[insitu_label, vdata_label]]
            scaled_label = vdata_label

    # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True)

    labels, values = scaled_data.to_dygraph_format()

    ascat_insitu = {'labels': labels, 'data': values}

    x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values

    kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist())
    spearman, p_spearman = sc_stats.spearmanr(x, y)
    pearson, p_pearson = sc_stats.pearsonr(x, y)
    rmsd = metrics.rmsd(x, y)
    bias = metrics.bias(y, x)
    mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y)
    statistics = {
        'kendall': {
            'v': '%.2f' % kendall,
            'p': '%.4f' % p_kendall
        },
        'spearman': {
            'v': '%.2f' % spearman,
            'p': '%.4f' % p_spearman
        },
        'pearson': {
            'v': '%.2f' % pearson,
            'p': '%.4f' % p_pearson
        },
        'bias': '%.4f' % bias,
        'rmsd': {
            'rmsd': '%.4f' % np.sqrt(mse),
            'rmsd_corr': '%.4f' % np.sqrt(mse_corr),
            'rmsd_bias': '%.4f' % np.sqrt(mse_bias),
            'rmsd_var': '%.4f' % np.sqrt(mse_var)
        },
        'mse': {
            'mse': '%.4f' % mse,
            'mse_corr': '%.4f' % mse_corr,
            'mse_bias': '%.4f' % mse_bias,
            'mse_var': '%.4f' % mse_var
        }
    }

    scaling_options = {
        'noscale': 'No scaling',
        'porosity': 'Scale using porosity',
        'linreg': 'Linear Regression',
        'mean_std': 'Mean - standard deviation',
        'min_max': 'Minimum,maximum',
        'lin_cdf_match': 'Piecewise <br> linear CDF matching',
        'cdf_match': 'CDF matching'
    }

    settings = {
        'scaling': scaling_options[scaling],
        # 'snow_depth': mask['snow_depth'],
        # 'surface_temp': mask['st_l1'],
        # 'air_temp': mask['air_temp']
    }

    era_data = {'labels': [], 'data': []}
    output_data = {
        'validation_data': ascat_insitu,
        'masking_data': era_data,
        'statistics': statistics,
        'settings': settings
    }

    return output_data, 1
Example #18
0
def compare_ssm_index(index, warp_gpi, sm_dataset, start=None, end=None,
                      weekly=True, plot=False):
    
    df = ssm_iwmi.IWMI_read_csv()    
    iwmi_bare, iwmi_crop = ssm_iwmi.IWMI_ts_index(df, index)
    if sm_dataset == 'cci':
        sm_ts = ssm_TUW.read_CCI(warp_gpi, start, end)
    if sm_dataset == 'ascat':
        sm_ts = ssm_TUW.read_ASCAT_ssm(warp_gpi)
    if sm_dataset == 'ers':
        sm_ts = ssm_TUW.read_ERS_ssm(warp_gpi, args=['sm'], start=None,
                                     end=None)

    if weekly == True:        
        iwmi_bare_weekly = iwmi_bare.resample('W', how='mean').dropna()
        iwmi_crop_weekly = iwmi_crop.resample('W', how='mean').dropna()
        sm_ts_weekly = sm_ts.resample('W', how='mean').dropna()

        # correlation of bare and crop
        if len(iwmi_bare) != 0 and len(iwmi_crop) != 0:
            iwmi_bare_weekly.columns.values[0] = 1
            match_bare_crop = temp_match.matching(iwmi_crop_weekly, iwmi_bare_weekly)
            corr_crop_bare = metrics.spearmanr(match_bare_crop.iloc[:, 0], match_bare_crop.iloc[:, 1])[0]
            print corr_crop_bare

        if len(iwmi_bare) == 0:
            iwmi_bare_weekly = iwmi_bare_weekly
        else:
            match_bare = temp_match.matching(sm_ts_weekly, iwmi_bare_weekly)
            iwmi_bare_weekly_match = match_bare.iloc[:,1]
            sm_ts_weekly_bare = match_bare.iloc[:,0]
            iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare_weekly.iloc[:,0],
                                                   sm_ts_weekly)
            iwmi_bare_weekly = pd.DataFrame(iwmi_bare_resc,
                                            index=iwmi_bare_weekly.index)
            corr_bare = metrics.spearmanr(iwmi_bare_weekly_match, 
                                          sm_ts_weekly_bare)[0]

        if len(iwmi_crop) == 0:
            iwmi_crop_weekly = iwmi_crop_weekly
        else:
            match_crop = temp_match.matching(sm_ts_weekly, iwmi_crop_weekly)
            iwmi_crop_weekly_match = match_crop.iloc[:, 1]
            sm_ts_weekly_crop = match_crop.iloc[:,0]
            iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop_weekly.iloc[:,0],
                                                   sm_ts_weekly)
            iwmi_crop_weekly = pd.DataFrame(iwmi_crop_resc,
                                            index=iwmi_crop_weekly.index)
            corr_crop = metrics.spearmanr(iwmi_crop_weekly_match, 
                                          sm_ts_weekly_crop)[0]

    else:
        if len(iwmi_bare) == 0:
            iwmi_bare_resc = iwmi_bare
        else:
            iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare, sm_ts)
            
        if len(iwmi_crop) == 0:
            iwmi_crop_resc = iwmi_crop
        else:
            iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop, sm_ts)
    
        iwmi_bare = pd.DataFrame(iwmi_bare_resc,
                                index=iwmi_bare.index)
        iwmi_crop = pd.DataFrame(iwmi_crop_resc,
                                index=iwmi_crop.index)

    if plot == True:
        if weekly == True:
            sm_ts_plot = sm_ts_weekly
            iwmi_bare_plot = iwmi_bare_weekly
            iwmi_crop_plot = iwmi_crop_weekly
        else:
            sm_ts_plot = sm_ts
            iwmi_bare_plot = iwmi_bare
            iwmi_crop_plot = iwmi_crop
        ax = sm_ts_plot.plot(color='b')
        if len(iwmi_crop_plot) != 0:
            iwmi_crop_plot.plot(color='r', ax=ax)
        if len(iwmi_bare_plot) != 0:
            iwmi_bare_plot.plot(color='g', ax=ax)
        plt.legend([sm_dataset+' ts', 'iwmi crop, index '+str(index),
                    'iwmi bare, index '+str(index)])
        if sm_dataset in ['ers', 'ascat']:
            plt.ylabel('degree of saturation [%]')
            if 'corr_crop_bare' in locals():
                plt.title('corr_bare_crop ='+str(round(corr_crop_bare, 3)))
            plt.ylim([0, 140])
        else:
            plt.ylabel('volumetric soil moisture [m3/m3]')
            if 'corr_bare' in locals() and 'corr_crop' in locals():
                plt.title('corr_bare = '+str(round(corr_bare,3))+
                          '   corr_crop = '+str(round(corr_crop,3))+
                          '\n corr_bare_crop = '+str(round(corr_crop_bare,3)))
            elif 'corr_bare' in locals():
                plt.title('corr_bare = '+str(round(corr_bare,3)))
            elif 'corr_crop' in locals():
                plt.title('corr_crop = '+str(round(corr_crop,3)))
                plt.ylim([0, 100])
        plt.grid()
        #plt.show()
        plt.savefig(os.path.join(root.x, 'staff', 'ipfeil', 'iwmi_plots',
                                 sm_dataset,
                                 sm_dataset+'_cdf_'+str(index)+'.png'))
        plt.clf()
    
    return iwmi_bare_plot, iwmi_crop_plot, sm_ts_plot
Example #19
0
def start_pred(paths, region, pred_date, vi_str='NDVI', 
                        t_val='SWI_040', monthly=False, spatial_res=0.1):
    
    if spatial_res == 0.1:
        with Dataset(paths['SWI'], 'r') as ncfile:
            res_lons = ncfile.variables['lon'][:]
            res_lats = ncfile.variables['lat'][:]
    elif spatial_res == 500:
        with Dataset(paths['NDVI'], 'r') as ncfile:
            res_lons = ncfile.variables['lon'][:]
            res_lats = ncfile.variables['lat'][:]
    
    with Dataset(paths['lc']) as ncfile:
        lccs = ncfile.variables['lccs_class'][:]
        lc_lons = ncfile.variables['lon'][:]
        lc_lats = ncfile.variables['lat'][:]
    
    #===========================================================================
    # # achtung pc haengt sich eine zeit lang auf
    # lc_lons, lc_lats = np.meshgrid(lc_lons, lc_lats)
    # lc_lons = lc_lons.flatten()
    # lc_lats = lc_lats.flatten()
    # scatterplot(lc_lons, lc_lats, lccs, discrete=False, vmin=-128, vmax=128)
    #===========================================================================
    
    # districts
    shapefile = os.path.join('C:\\', 'Users', 'i.pfeil', 'Documents', 
                             '0_IWMI_DATASETS', 'shapefiles', 'IND_adm', 
                             'IND_adm2')
    
    shpfile = Shape(region, shapefile=shapefile)
    lon_min, lat_min, lon_max, lat_max = shpfile.bbox
    lons = res_lons[np.where((res_lons>=lon_min) & (res_lons<=lon_max))]
    lats = res_lats[np.where((res_lats>=lat_min) & (res_lats<=lat_max))]
    
    start_date = datetime(2007,7,1)
    end_date = datetime(2015,7,1)
    
    results = []
    results2 = []
    results3 = []
    for lon in lons:
        for lat in lats:
            print lon, lat
            if round(lon,2) == 76.35 and round(lat,2) == 19.55:
                print 'danger'
            
            nearest_lon = find_nearest(lc_lons, lon)
            nearest_lat = find_nearest(lc_lats, lat)
            lc = lccs[nearest_lat, nearest_lon]
            
            if (lc == -66) or (lc == -46) or (lc == -36) or (lc == 0):
                # urban | water | snow and ice | no data
                print 'lc mask'
                continue
                
            swi_path = paths['SWI']
            vi_path = paths[vi_str]
            
            swi_list = [t_val]
            swi_df = read_ts(swi_path, lon=lon, lat=lat, params=swi_list, 
                             start_date=start_date, end_date=end_date)
            # read vi and scale from 0 to 100 (before 0 to 250)
            vi_all = read_ts(vi_path, lon=lon, lat=lat, params=vi_str, 
                         start_date=start_date, end_date=end_date)
            #vi_all[vi_str][np.where(vi_all==-99)[0]] = np.NaN
            #vi_all = vi_all*100/250
            
            vi = vi_all[:pred_date]
            vi_min = np.nanmin(vi)
            vi_max = np.nanmax(vi)
            vi = rescale_peng(vi, vi_min, vi_max)
            
            swi_all = swi_df[t_val]
            swi = swi_all[:pred_date]
            swi = rescale_peng(swi, np.nanmin(swi), np.nanmax(swi))
    
            # resample monthly
            if monthly:
                swi = swi.resample("M").mean()
                vi = vi.resample("M").mean()
            
            # calculate differences between VIs of consecutive months
            dvi = np.ediff1d(vi, to_end=np.NaN)
            vi['D_VI'] = pd.Series(dvi, index=vi.index)
            matched_data = temp_match.matching(swi, vi)
            
            kd = zribi_kd(swi, vi, matched_data)
            results, results2, results3 = zribi_sim(lon, lat, swi, vi, 
                                                    matched_data, kd, vi_min, vi_max,
                                                    results=results, results2=results2,
                                                    results3=results3)
        
    np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results.npy', results)
    np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results2.npy', results2)
    np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results3.npy', results3)
Example #20
0
                                                      mask_frozen_prob=5,
                                                      mask_snow_prob=5)
        

        # drop nan values before doing any matching
        ascat_time_series.data = ascat_time_series.data.dropna()
        
        ISMN_time_series.data = ISMN_time_series.data.dropna()
        
        # rename the soil moisture column in ISMN_time_series.data to insitu_sm
        # to clearly differentiate the time series when they are plotted together
        ISMN_time_series.data.rename(columns={'soil moisture':label_insitu}, inplace=True)
        
        # get ISMN data that was observerd within +- 1 hour(1/24. day) of the ASCAT observation
        # do not include those indexes where no observation was found
        matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data,
                                                window=1 / 24.)
        # matched ISMN data is now a dataframe with the same datetime index
        # as ascat_time_series.data and the nearest insitu observation      
        
        # continue only with relevant columns
        matched_data = matched_data[[label_ascat, label_insitu]]
        
        # the plot shows that ISMN and ASCAT are observed in different units
        matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat],
                          title='temporally merged data')
        plt.show()
        
        # this takes the matched_data DataFrame and scales all columns to the 
        # column with the given reference_index, in this case in situ 
        scaled_data = scaling.scale(matched_data, method='lin_cdf_match',
                                         reference_index=1)
Example #21
0
def validate(params,
             timespan=('2009-01', '2009-12'), gpi=None, rescaling=None,
             y_axis_range=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    timespan : tuple, optional
        timespan to analyze
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.
    rescaling : string, optional
        rescaling method, one of 'min_max', 'linreg', 'mean_std' and 'lin_cdf_match'
        Default: None
        insitu is the reference to which is scaled
    y_axis_range : tuple, optional
        specify (min, max) of y axis


    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    unit_dict = {'freq': 'GHz',
                 'sand': '',
                 'clay': '',
                 'temp': '$^\circ$C',
                 'eps': '',
                 'theta': '$^\circ$',
                 'f_rms': '',
                 'sig_bare': 'dB',
                 'm_soil': '%',
                 'm_veg': '%',
                 'm_soil_x0': '%',
                 'm_veg_x0': '%',
                 's_vol': '$m^3ha^{-1}$',
                 'sig_canopy': 'dB',
                 'sig_for': 'dB',
                 'sig_floor': 'dB',
                 'polarization': ''}

    param_should = ['sand', 'clay', 'temp',
                    's_vol', 'f_rms',
                    'm_veg_x0', 'm_soil_x0']

    for param in param_should:
        assert param in params.keys()

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','2011528_2009.csv'), index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0)

	
    ismn_file = os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm')
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas']) / 100.0
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['spearman'] = df_metrics.spearmanr(scaled)
    metrics['ubrmsd'] = df_metrics.rmsd(scaled)
    metrics['std_ratio'] = df_std_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
        rmsd_title = 'unbiased RMSD'
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])
        rmsd_title = 'RMSD'


    axes = scaled.plot(title=ts_title, figsize=(18, 8))
    plt.legend()

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = ['bias', 'pearson R', 'spearman rho', rmsd_title, 'stddev ratio']
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append(["%.2f" % metric_values['ascat_and_insitu'],
                              "%.2f" % metric_values['ascat_and_gldas'],
                              "%.2f" % metric_values['insitu_and_gldas']])


    table = plt.table(
              cellText=cell_text,
              colLabels=columns,
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=row_labels, loc='bottom',
              bbox=(0.2, -0.5, 0.5, 0.3))

    tcol_table = plt.table(
              cellText=[["%.2f" % tcol_error['ascat'],
                         "%.2f" % tcol_error['gldas'],
                         "%.2f" % tcol_error['insitu']]],
              colLabels=('ascat      ', 'gldas      ', 'insitu      '),
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=['Triple collocation error'], loc='bottom',
              bbox=(0.2, -0.6, 0.5, 0.1))

    plt.subplots_adjust(left=0.08, bottom=0.35, right=0.85)
    plt.draw()
#
    if y_axis_range is not None:
        axes.set_ylim(y_axis_range)

    params['m_veg_x0'] = m_veg_x0
    params['m_soil_x0'] = m_soil_x0

    infotext = []
    for label in sorted(param_should):
        infotext.append('%s = %s %s' % (label, params[label], unit_dict[label]))

    infotext = '\n'.join(infotext)

    # place a text box in upper left in axes coords
    axes.text(1.03, 1, infotext, transform=axes.transAxes, fontsize=12,
            verticalalignment='top', bbox=props)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    for j, ax in enumerate(axes.flatten()):
        if y_axis_range is not None:
            ax.set_xlim(y_axis_range)

        if np.remainder(j + 1, 3 + 1) != 1:
            if y_axis_range is not None:
                ax.set_ylim(y_axis_range)
            min_x, max_x = ax.get_xlim()
            min_y, max_y = ax.get_ylim()
            # find minimum lower left coordinate and maximum upper right
            min_ll = min([min_x, min_y])
            max_ur = max([max_x, max_y])
            ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')
Example #22
0
def optimise(params,
             timespan=('2009-01', '2009-12'),
             gpi=None,
             rescaling=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.

    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"),
                               index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(
        m_veg_x0, m_soil_x0)

    ismn_file = os.path.join(
        'data',
        'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm'
    )
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(
        columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'),
                        parse_dates=True,
                        index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas'])
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['kendall'] = df_metrics.kendalltau(scaled)
    metrics['ubrmsd'] = df_metrics.ubrmsd(scaled)
    metrics['var_ratio'] = df_var_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])

    axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8))

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = [
        'bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio'
    ]
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append([
            "%.2f" % metric_values['ascat_and_insitu'],
            "%.2f" % metric_values['ascat_and_gldas'],
            "%.2f" % metric_values['insitu_and_gldas']
        ])

    table = plt.table(cellText=cell_text,
                      colLabels=columns,
                      colWidths=[0.1, 0.1, 0.1],
                      rowLabels=row_labels,
                      loc='bottom',
                      bbox=(0.2, -1.25, 0.5, 0.8))

    tcol_table = plt.table(cellText=[[
        "%.2f" % tcol_error['ascat'],
        "%.2f" % tcol_error['gldas'],
        "%.2f" % tcol_error['insitu']
    ]],
                           colLabels=('ascat', 'gldas', 'insitu'),
                           colWidths=[0.1, 0.1, 0.1],
                           rowLabels=['Triple collocation error'],
                           loc='bottom',
                           bbox=(0.2, -1.65, 0.5, 0.3))

    plt.subplots_adjust(left=0.08, bottom=0.35)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    if rescaling is not None:
        for j, ax in enumerate(axes.flatten()):

            if np.remainder(j + 1, 3 + 1) != 1:
                min_x, max_x = ax.get_xlim()
                min_y, max_y = ax.get_ylim()
                # find minimum lower left coordinate and maximum upper right
                min_ll = min([min_x, min_y])
                max_ur = max([max_x, max_y])
                ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')

    return df
Example #23
0
        # focus only on the relevant variable
        ascat_time_series.data = ascat_time_series.data[label_ascat]

        # drop nan values before doing any matching
        ascat_time_series.data = ascat_time_series.data.dropna()

        ISMN_time_series.data = ISMN_time_series.data.dropna()
        
        # rename the soil moisture column in ISMN_time_series.data to insitu_sm
        # to clearly differentiate the time series when they are plotted together
        ISMN_time_series.data.rename(columns={'soil moisture':label_insitu}, inplace=True)
        
        # get ISMN data that was observerd within +- 1 hour(1/24. day) of the ASCAT observation
        # do not include those indexes where no observation was found
        matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data,
                                                window=1 / 24.)
        # matched ISMN data is now a dataframe with the same datetime index
        # as ascat_time_series.data and the nearest insitu observation      
        
        # continue only with relevant columns
        matched_data = matched_data[[label_ascat, label_insitu]]
        
        # the plot shows that ISMN and ASCAT are observed in different units
        matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat],
                          title='temporally merged data')
        plt.show()
        
        # this takes the matched_data DataFrame and scales all columns to the 
        # column with the given reference_index, in this case in situ 
        scaled_data = scaling.scale(matched_data, method='lin_cdf_match',
                                         reference_index=1)
def optimise(params,
             timespan=('2009-01', '2009-12'), gpi=None, rescaling=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.

    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0)

    ismn_file = os.path.join('data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm')
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas'])
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['kendall'] = df_metrics.kendalltau(scaled)
    metrics['ubrmsd'] = df_metrics.ubrmsd(scaled)
    metrics['var_ratio'] = df_var_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])

    axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8))

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = ['bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio']
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append(["%.2f" % metric_values['ascat_and_insitu'],
                              "%.2f" % metric_values['ascat_and_gldas'],
                              "%.2f" % metric_values['insitu_and_gldas']])

    table = plt.table(
              cellText=cell_text,
              colLabels=columns,
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=row_labels, loc='bottom',
              bbox=(0.2, -1.25, 0.5, 0.8))

    tcol_table = plt.table(
              cellText=[["%.2f" % tcol_error['ascat'],
                         "%.2f" % tcol_error['gldas'],
                         "%.2f" % tcol_error['insitu']]],
              colLabels=('ascat', 'gldas', 'insitu'),
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=['Triple collocation error'], loc='bottom',
              bbox=(0.2, -1.65, 0.5, 0.3))

    plt.subplots_adjust(left=0.08, bottom=0.35)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    if rescaling is not None:
        for j, ax in enumerate(axes.flatten()):

            if np.remainder(j + 1, 3 + 1) != 1:
                min_x, max_x = ax.get_xlim()
                min_y, max_y = ax.get_ylim()
                # find minimum lower left coordinate and maximum upper right
                min_ll = min([min_x, min_y])
                max_ur = max([max_x, max_y])
                ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')

    return df