Example #1
0
def calc_rho(ascat_ssm, FP_df, hoal_df):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    
    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], 
                            hoal_df['HOAL_sm0.05'])
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()
    
    data_together = scale(matched_data)#, method="mean_std")
    
    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                  data_together['ssm_ascat'].iloc[:-3])
    
    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    
    exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s',
               'merge_key']
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22',
              fontsize=24)
              #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
              #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]',fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.ylim([0,60])
    plt.show()
Example #2
0
def calc_rho(ascat_ssm, FP_df, hoal_df):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54

    matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'],
                            hoal_df['HOAL_sm0.05'])
    matched_data.plot()
    plt.title('Matched data: ASCAT, FP, HOAL')
    plt.show()

    data_together = scale(matched_data)  #, method="mean_std")

    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                  data_together['ssm_ascat'].iloc[:-3])

    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                    data_together['HOAL_sm0.05'].iloc[:-3])

    exclude = [
        'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'
    ]
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title(
        'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22',
        fontsize=24)
    #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
    #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]', fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.ylim([0, 60])
    plt.show()
Example #3
0
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54

    # in welcher Reihenfolge matchen
    data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw)
    data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw)
    data_together2 = matching(FP_df, hoal_df)
    print('ref: FP', data_together)
    print('ref: ASCAT', data_together1)

    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                  data_together['ssm_ascat'].iloc[:-3])

    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3],
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    hoal_rho_ts = metrics.spearmanr(
        data_together['air_temperature_' + 'celsius'].iloc[:-3],
        data_together['HOAL_ts0.05'].iloc[:-3])
    hoal_raw_rho_sm = metrics.spearmanr(
        data_together['Parrot_vwc'].iloc[:-3],
        data_together['HOAL_raw_sm1'].iloc[:-3])

    print ascat_rho
    print hoal_rho_sm
    print hoal_rho_ts
    print hoal_raw_rho_sm

    exclude = [
        'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'
    ]
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title(
        'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22' +
        '\n rho_ASCAT_Parrot: ' + str(np.round(ascat_rho[0], 3)) +
        ', rho_HOAL_Parrot: ' + str(np.round(hoal_rho_sm[0], 3)) +
        ', rho_HOAL_raw_Parrot: ' + str(np.round(hoal_raw_rho_sm[0], 3)))
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()

    data_together1.ix[:, data_together1.columns.difference(exclude)].plot()
    plt.show()
    data_together2.ix[:, data_together2.columns.difference(exclude)].plot()
    plt.show()
Example #4
0
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw):
    # multiply ASCAT with porosity (0.54) to get same units
    ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54
    
    # in welcher Reihenfolge matchen
    data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw)
    data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw)
    data_together2 = matching(FP_df, hoal_df)
    print('ref: FP', data_together)
    print('ref: ASCAT', data_together1)
    
    ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                  data_together['ssm_ascat'].iloc[:-3])
    
    hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_sm0.05'].iloc[:-3])
    hoal_rho_ts = metrics.spearmanr(data_together['air_temperature_'+
                                                       'celsius'].iloc[:-3], 
                                    data_together['HOAL_ts0.05'].iloc[:-3])
    hoal_raw_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], 
                                    data_together['HOAL_raw_sm1'].iloc[:-3])
    
    print ascat_rho
    print hoal_rho_sm
    print hoal_rho_ts
    print hoal_raw_rho_sm

    exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s',
               'merge_key']
    data_together.ix[:, data_together.columns.difference(exclude)].plot()
    plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22'+
              '\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+
              ', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3))+
              ', rho_HOAL_raw_Parrot: '+str(np.round(hoal_raw_rho_sm[0],3)))
    plt.ylabel('Volumetric Water Content [%]')
    plt.show()
    
    data_together1.ix[:, data_together1.columns.difference(exclude)].plot()
    plt.show()
    data_together2.ix[:, data_together2.columns.difference(exclude)].plot()
    plt.show()
Example #5
0
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max):
    df = create_drought_dist(lat_min, lat_max, lon_min, lon_max)
    df.drought = df.drought * (-1)
    drought = pd.DataFrame(df.drought)
    tt = [1, 5, 10, 15, 20, 40, 60, 100]
    for t in tt:
        print t
        df_swi = read_ts_area('C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc',
                              'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max)
        anomaly_swi = anomaly(df_swi)

        df_anom = anomaly_swi.loc[:'20150626'] / 100
        match = temp_match.matching(drought, df_anom)
        s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1])
        print s_rho, s_p
Example #6
0
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max):
    df = create_drought_dist(lat_min, lat_max, lon_min, lon_max)
    df.drought = df.drought * (-1)
    drought = pd.DataFrame(df.drought)
    tt = [1, 5, 10, 15, 20, 40, 60, 100]
    for t in tt:
        print t
        df_swi = read_ts_area(
            'C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc',
            'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max)
        anomaly_swi = anomaly(df_swi)

        df_anom = anomaly_swi.loc[:'20150626'] / 100
        match = temp_match.matching(drought, df_anom)
        s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1])
        print s_rho, s_p
Example #7
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset["n_obs"][0] = len(data)
        dataset["gpi"][0] = gpi_info[0]
        dataset["lon"][0] = gpi_info[1]
        dataset["lat"][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data["ref"].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset["R"][0], dataset["p_R"][0] = R, p_R
        dataset["rho"][0], dataset["p_rho"][0] = rho, p_rho
        dataset["RMSD"][0] = RMSD
        dataset["BIAS"][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset["tau"][0], dataset["p_tau"][0] = tau, p_tau

        return dataset
Example #8
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau

        return dataset
Example #9
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is not calculated at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data['other'].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        # tau, p_tau = metrics.kendalltau(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        # dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        return dataset
Example #10
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            if subset.sum() < 10:
                continue

            x = data['ref'].values[subset]
            y = data[self.other_name].values[subset]
            R, p_R = metrics.pearsonr(x, y)
            rho, p_rho = metrics.spearmanr(x, y)

            dataset['{:}_n_obs'.format(season)][0] = subset.sum()
            dataset['{:}_R'.format(season)][0] = R
            dataset['{:}_p_R'.format(season)][0] = p_R
            dataset['{:}_rho'.format(season)][0] = rho
            dataset['{:}_p_rho'.format(season)][0] = p_rho

        return dataset
Example #11
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            if subset.sum() < 10:
                continue

            x = data['ref'].values[subset]
            y = data[self.other_name].values[subset]
            R, p_R = metrics.pearsonr(x, y)
            rho, p_rho = metrics.spearmanr(x, y)

            dataset['{:}_n_obs'.format(season)][0] = subset.sum()
            dataset['{:}_R'.format(season)][0] = R
            dataset['{:}_p_R'.format(season)][0] = p_R
            dataset['{:}_rho'.format(season)][0] = rho
            dataset['{:}_p_rho'.format(season)][0] = p_rho

        return dataset
Example #12
0
        plt.show()
        
        plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()
        
        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values
        
        print "ISMN time series:",ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)
        
        
    i += 1
    
    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break    
    

Example #13
0
def compare_ssm_index(index, warp_gpi, sm_dataset, start=None, end=None,
                      weekly=True, plot=False):
    
    df = ssm_iwmi.IWMI_read_csv()    
    iwmi_bare, iwmi_crop = ssm_iwmi.IWMI_ts_index(df, index)
    if sm_dataset == 'cci':
        sm_ts = ssm_TUW.read_CCI(warp_gpi, start, end)
    if sm_dataset == 'ascat':
        sm_ts = ssm_TUW.read_ASCAT_ssm(warp_gpi)
    if sm_dataset == 'ers':
        sm_ts = ssm_TUW.read_ERS_ssm(warp_gpi, args=['sm'], start=None,
                                     end=None)

    if weekly == True:        
        iwmi_bare_weekly = iwmi_bare.resample('W', how='mean').dropna()
        iwmi_crop_weekly = iwmi_crop.resample('W', how='mean').dropna()
        sm_ts_weekly = sm_ts.resample('W', how='mean').dropna()

        # correlation of bare and crop
        if len(iwmi_bare) != 0 and len(iwmi_crop) != 0:
            iwmi_bare_weekly.columns.values[0] = 1
            match_bare_crop = temp_match.matching(iwmi_crop_weekly, iwmi_bare_weekly)
            corr_crop_bare = metrics.spearmanr(match_bare_crop.iloc[:, 0], match_bare_crop.iloc[:, 1])[0]
            print corr_crop_bare

        if len(iwmi_bare) == 0:
            iwmi_bare_weekly = iwmi_bare_weekly
        else:
            match_bare = temp_match.matching(sm_ts_weekly, iwmi_bare_weekly)
            iwmi_bare_weekly_match = match_bare.iloc[:,1]
            sm_ts_weekly_bare = match_bare.iloc[:,0]
            iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare_weekly.iloc[:,0],
                                                   sm_ts_weekly)
            iwmi_bare_weekly = pd.DataFrame(iwmi_bare_resc,
                                            index=iwmi_bare_weekly.index)
            corr_bare = metrics.spearmanr(iwmi_bare_weekly_match, 
                                          sm_ts_weekly_bare)[0]

        if len(iwmi_crop) == 0:
            iwmi_crop_weekly = iwmi_crop_weekly
        else:
            match_crop = temp_match.matching(sm_ts_weekly, iwmi_crop_weekly)
            iwmi_crop_weekly_match = match_crop.iloc[:, 1]
            sm_ts_weekly_crop = match_crop.iloc[:,0]
            iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop_weekly.iloc[:,0],
                                                   sm_ts_weekly)
            iwmi_crop_weekly = pd.DataFrame(iwmi_crop_resc,
                                            index=iwmi_crop_weekly.index)
            corr_crop = metrics.spearmanr(iwmi_crop_weekly_match, 
                                          sm_ts_weekly_crop)[0]

    else:
        if len(iwmi_bare) == 0:
            iwmi_bare_resc = iwmi_bare
        else:
            iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare, sm_ts)
            
        if len(iwmi_crop) == 0:
            iwmi_crop_resc = iwmi_crop
        else:
            iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop, sm_ts)
    
        iwmi_bare = pd.DataFrame(iwmi_bare_resc,
                                index=iwmi_bare.index)
        iwmi_crop = pd.DataFrame(iwmi_crop_resc,
                                index=iwmi_crop.index)

    if plot == True:
        if weekly == True:
            sm_ts_plot = sm_ts_weekly
            iwmi_bare_plot = iwmi_bare_weekly
            iwmi_crop_plot = iwmi_crop_weekly
        else:
            sm_ts_plot = sm_ts
            iwmi_bare_plot = iwmi_bare
            iwmi_crop_plot = iwmi_crop
        ax = sm_ts_plot.plot(color='b')
        if len(iwmi_crop_plot) != 0:
            iwmi_crop_plot.plot(color='r', ax=ax)
        if len(iwmi_bare_plot) != 0:
            iwmi_bare_plot.plot(color='g', ax=ax)
        plt.legend([sm_dataset+' ts', 'iwmi crop, index '+str(index),
                    'iwmi bare, index '+str(index)])
        if sm_dataset in ['ers', 'ascat']:
            plt.ylabel('degree of saturation [%]')
            if 'corr_crop_bare' in locals():
                plt.title('corr_bare_crop ='+str(round(corr_crop_bare, 3)))
            plt.ylim([0, 140])
        else:
            plt.ylabel('volumetric soil moisture [m3/m3]')
            if 'corr_bare' in locals() and 'corr_crop' in locals():
                plt.title('corr_bare = '+str(round(corr_bare,3))+
                          '   corr_crop = '+str(round(corr_crop,3))+
                          '\n corr_bare_crop = '+str(round(corr_crop_bare,3)))
            elif 'corr_bare' in locals():
                plt.title('corr_bare = '+str(round(corr_bare,3)))
            elif 'corr_crop' in locals():
                plt.title('corr_crop = '+str(round(corr_crop,3)))
                plt.ylim([0, 100])
        plt.grid()
        #plt.show()
        plt.savefig(os.path.join(root.x, 'staff', 'ipfeil', 'iwmi_plots',
                                 sm_dataset,
                                 sm_dataset+'_cdf_'+str(index)+'.png'))
        plt.clf()
    
    return iwmi_bare_plot, iwmi_crop_plot, sm_ts_plot
Example #14
0
        scaled_data.plot(secondary_y=[label_ascat])
        plt.show()

        plt.scatter(matched_data[scaled_ascat_label].values,
                    matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()

        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[
            label_insitu].values

        print "ISMN time series:", ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)

    i += 1

    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break
Example #15
0
    def _calc_validation_metrics(self):
        """
        Calculate vertical metrics between candidate and reference using pytesmo.

        Currently implemented:
            bias, mad, rmsd, nrmsd,
        Returns
        -------
        df_validation_metrics: pd.DataFrame
            Data Frame that contains the metrics between the candidate and reference
            for the 2 groups
        """
        df_validation_metrics = pd.DataFrame()

        for group_no, subset_data in enumerate([self.set0, self.set1, self.setfull]):
            if group_no in [0,1]:
                group = 'group%i' % group_no
            else:
                group = 'FRAME'
            if 'bias' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    bias = np.nan
                else:
                    bias =metrics.bias(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['bias', '%s' % group] = bias

            if 'mad' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    mad = np.nan
                else:
                    mad =metrics.mad(subset_data[self.reference_name].values,
                                     subset_data[self.candidate_name].values)
                df_validation_metrics.at['mad', '%s' % group] = mad

            if 'rmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    rmsd = np.nan
                else:
                    rmsd =metrics.rmsd(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['rmsd', '%s' % group] = rmsd

            if 'nrmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    nrmsd = np.nan
                else:
                    nrmsd =metrics.nrmsd(subset_data[self.reference_name].values,
                                         subset_data[self.candidate_name].values)
                df_validation_metrics.at['nrmsd', '%s' % group] = nrmsd

            if 'PearsonR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    pr, pp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        pr, pp =metrics.pearsonr(subset_data[self.reference_name].values,
                                                 subset_data[self.candidate_name].values)

                df_validation_metrics.at['PearsonR', '%s' % group] = pr
                df_validation_metrics.at['Pp', '%s' % group] = pp

            if 'SpearmanR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    sr, sp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        sr, sp = metrics.spearmanr(subset_data[self.reference_name].values,
                                                   subset_data[self.candidate_name].values)

                df_validation_metrics.at['SpearmanR', '%s' % group] = sr
                df_validation_metrics.at['Sp', '%s' % group] = sp

        return df_validation_metrics
Example #16
0
def corr(paths,
         corr_df,
         start_date,
         end_date,
         lon=None,
         lat=None,
         vi_str='NDVI',
         time_lags=[0, 10, 20, 30, 40, 50, 60, 100],
         plot_time_lags=False):
    """ Calculate Spearman's Rho and p-value for SWI (all t-values) and 
    specified VI (default NDVI).
    If plot_time_lags is True, a plot of VI (for different time lags) over
    SWI (all t-values) is created.
    
    Parameters:
    -----------
    paths : dict
        Paths to datasets
    corr_df : pd.DataFrame
        DataFrame where correlation coeff.s are stored
    start_date, end_date : datetime
        Start and end date
    vi_str : str, optional
        Vegetation index to use, default: NDVI
    time_lag : int, optional
        time lag for shifting VI, default: 0 (days)
    plot_time_lags : bool, optional
        Plot (shifted) VI over SWIs, default: False
        
    Returns:
    --------
    corr_df : pd.DataFrame
        DataFrame containing the correlation coeff.s
    """

    swi_path = paths['SWI']
    vi_path = paths[vi_str]

    # read SWI for different T-values and VI
    swi_list = [
        'SWI_001', 'SWI_010', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100'
    ]
    swi_df = read_ts(swi_path,
                     lon=lon,
                     lat=lat,
                     params=swi_list,
                     start_date=start_date,
                     end_date=end_date)
    vi = read_ts(vi_path,
                 lon=lon,
                 lat=lat,
                 params=vi_str,
                 start_date=start_date,
                 end_date=end_date)
    vi[vi_str][np.where(vi == 255)[0]] = np.NaN

    water = {}
    for swi_key in swi_list:
        water[swi_key] = swi_df[swi_key]

    # rescale VI before further processing using method from Peng et al., 2014
    for ds_water in water:
        water[ds_water] = rescale_peng(water[ds_water],
                                       np.nanmin(water[ds_water]),
                                       np.nanmax(water[ds_water]))

    vi_resc = rescale_peng(vi, np.nanmin(vi), np.nanmax(vi))

    # insert time lag
    for time_lag in time_lags:
        if time_lag > 0:
            vi = vi_resc.copy()
            vi_idx = vi.index + timedelta(days=time_lag)
            vi = pd.DataFrame(vi.values, columns=[vi_str], index=vi_idx)

        # plot vi time lags over SWI
        if plot_time_lags and time_lag == 0:
            vi0 = vi.copy()
            vi_idx10 = vi.index + timedelta(days=10)
            vi10 = pd.DataFrame(vi.values, columns=['vi10'], index=vi_idx10)
            vi_idx20 = vi.index + timedelta(days=20)
            vi20 = pd.DataFrame(vi.values, columns=['vi20'], index=vi_idx20)
            vi_idx30 = vi.index + timedelta(days=30)
            vi30 = pd.DataFrame(vi.values, columns=['vi30'], index=vi_idx30)
            vi_idx40 = vi.index + timedelta(days=40)
            vi40 = pd.DataFrame(vi.values, columns=['vi40'], index=vi_idx40)
            vi_idx50 = vi.index + timedelta(days=50)
            vi50 = pd.DataFrame(vi.values, columns=['vi50'], index=vi_idx50)
            vi_idx60 = vi.index + timedelta(days=60)
            vi60 = pd.DataFrame(vi.values, columns=['vi60'], index=vi_idx60)
            vi_idx100 = vi.index + timedelta(days=100)
            vi100 = pd.DataFrame(vi.values, columns=['vi100'], index=vi_idx100)

            plot_alltogether(0, lon, lat, swi_df, vi0, save_fig=True)
            plot_alltogether(10, lon, lat, swi_df, vi10, save_fig=True)
            plot_alltogether(20, lon, lat, swi_df, vi20, save_fig=True)
            plot_alltogether(30, lon, lat, swi_df, vi30, save_fig=True)
            plot_alltogether(40, lon, lat, swi_df, vi40, save_fig=True)
            plot_alltogether(50, lon, lat, swi_df, vi50, save_fig=True)
            plot_alltogether(60, lon, lat, swi_df, vi60, save_fig=True)
            plot_alltogether(100, lon, lat, swi_df, vi100, save_fig=True)

        vegetation = {vi_str: vi}

        # calculate Spearman's Rho and p-value for VI and SWIs
        for ds_veg in vegetation.keys():
            for ds_water in sorted(water.keys()):
                data_together = temp_match.matching(water[ds_water],
                                                    vegetation[ds_veg])
                rho, p = metrics.spearmanr(data_together[ds_water],
                                           data_together[ds_veg])
                # mask values with p-value > 0.05
                if p > 0.05:
                    rho = np.NaN
                if ds_veg + '_' + ds_water + '_rho' in corr_df.columns:
                    corr_df[ds_veg + '_' + ds_water + '_rho'].iloc[np.where(
                        corr_df.index == time_lag)] = rho
                    corr_df[ds_veg + '_' + ds_water +
                            '_p'].iloc[np.where(corr_df.index == time_lag)] = p
                else:
                    corr_df[ds_veg + '_' + ds_water + '_rho'] = pd.Series(
                        rho, index=[time_lag])
                    corr_df[ds_veg + '_' + ds_water + '_p'] = pd.Series(
                        p, index=[time_lag])

    return corr_df