Esempi in Python per fdr_threshold, esempi in Python per utils.u_statistics.fdr_threshold

Esempio n. 1

0

Mostra file

    def array_juggling(data, month, hour=None):

        m = month

        if hour != None:
            data = data[((data['time.month'] >= 3) & (data['time.month'] <= 5))
                        & (data['time.hour'] == hour)]
        else:
            data = data[(data['time.month'] >= 3) & (data['time.month'] <= 5)]
        data_years = data.groupby('time.year').mean(axis=0)

        # stack lat and lon into a single dimension called allpoints
        datastacked = data_years.stack(allpoints=['latitude', 'longitude'])

        # apply the function over allpoints to calculate the trend at each point
        print('Entering trend calc')
        dtrend = datastacked.groupby('allpoints').apply(
            u_darrays.linear_trend_mk, alpha=0.05, eps=0.0001)

        ddtrend = dtrend['slope']
        try:
            pthresh = us.fdr_threshold(dtrend['pval'].values[np.isfinite(
                dtrend['pval'].values)],
                                       alpha=0.05)
            ddtrend.values[(dtrend['pval'].values > pthresh)
                           | np.isnan(dtrend['pval'].values)] = np.nan
        except ValueError:
            ddtrend.values = ddtrend.values * np.nan
            pthresh = np.nan
        print('p value threshold', pthresh)

        # unstack back to lat lon coordinates
        return ddtrend.unstack('allpoints'), data_years

Esempio n. 2

0

Mostra file

File: trend_linear_WA_theta_MERRA.py Progetto: zhpfu/proj_CEH

def calc_trend(data, month, hour=None, method=None, sig=False, wilks=False):

    y0 = 1980
    if method is None:
        'Please provide trend calc method: polyfit or mk (mann kendall)'
    if hour is not None:

        if len(month)>1:

            data = data[((data['time.month'] >= month[0]) & (data['time.month'] <= month[1])) & (data['time.hour'] == hour) & (data['time.year'] >= y0) & (data['time.year'] <= 2018)]
        else:

            data = data[(data['time.month'] == month[0]) & (data['time.hour'] == hour) & (data['time.year'] >= y0) & (data['time.year'] <= 2018)]
    else:
        if len(month)>1:
            data = data[((data['time.month'] >= month[0]) & (data['time.month'] <= month[1]))& (data['time.year'] >= y0) & (data['time.year'] <= 2018)]
        else:
            data = data[(data['time.month'] == month[0]) & (data['time.year'] >= y0) & (data['time.year'] <= 2018)]

    if len(data.time)==0:
        print('Data does not seem to have picked month or hour. Please check input data')


    mean_years = data.groupby('time.year').mean(dim='time').squeeze().load()

    # stack lat and lon into a single dimension called allpoints
    datastacked = mean_years.stack(allpoints=['lat', 'lon'])

    # apply the function over allpoints to calculate the trend at each point
    print('Entering trend calc')

    alpha = 0.05
    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method=='mk':
        dtrend = datastacked.groupby('allpoints').apply(u_darrays.linear_trend_mk, alpha=alpha, eps=0.0001,nb_missing=10)
        dtrend = dtrend.unstack('allpoints')
        if sig:
            (dtrend['slope'].values)[dtrend['ind'].values==0] = 0

    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method=='polyfit':
        dtrend = datastacked.groupby('allpoints').apply(u_darrays.linear_trend_lingress,nb_missing=10)
        dtrend = dtrend.unstack('allpoints')

        if sig:
            (dtrend['slope'].values)[dtrend['pval'].values > alpha] = 0

    ddtrend = dtrend['slope']

    if wilks and sig:
        try:
            pthresh = us.fdr_threshold(dtrend['pval'].values[np.isfinite(dtrend['pval'].values)], alpha=alpha)
            ddtrend.values[(dtrend['pval'].values > pthresh) | np.isnan(dtrend['pval'].values)] = np.nan
        except ValueError:
            ddtrend.values = ddtrend.values * np.nan
            pthresh = np.nan
        print('p value threshold', pthresh)

    # unstack back to lat lon coordinates
    return ddtrend, mean_years

Esempio n. 3

0

Mostra file

File: trend_linear_SA.py Progetto: cornkle/proj_CEH

def calc_trend(data, month, hour=None, method=None, sig=False, wilks=False):

    if method is None:
        'Please provide trend calc method: polyfit or mk (mann kendall)'
    if hour is not None:

        if len(month)>1:

            data = data[((data['time.month'] >= month[0]) | (data['time.month'] <= month[1])) & (data['time.hour'] == hour) & (data['time.year'] >= 1983) & (data['time.year'] <= 2017)]
        else:

            data = data[(data['time.month'] == month[0]) & (data['time.hour'] == hour) & (data['time.year'] >= 1983) & (data['time.year'] <= 2017)]
    else:
        if len(month)>1:
            data = data[((data['time.month'] >= month[0]) | (data['time.month'] <= month[1]))& (data['time.year'] >= 1983) & (data['time.year'] <= 2017)]
        else:
            data = data[(data['time.month'] == month[0]) & (data['time.year'] >= 1983) & (data['time.year'] <= 2017)]

    if len(data.time)==0:
        print('Data does not seem to have picked month or hour. Please check input data')


    mean_years = data.groupby('time.year').mean(axis=0)

    # stack lat and lon into a single dimension called allpoints
    datastacked = mean_years.stack(allpoints=['latitude', 'longitude'])

    # apply the function over allpoints to calculate the trend at each point
    print('Entering trend calc')

    alpha = 0.05
    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method=='mk':
        dtrend = datastacked.groupby('allpoints').apply(u_darrays.linear_trend_mk, alpha=alpha, eps=0.01,nb_missing=5)
        dtrend = dtrend.unstack('allpoints')
        if sig:
            (dtrend['slope'].values)[dtrend['ind'].values==0] = 0

    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method=='polyfit':
        dtrend = datastacked.groupby('allpoints').apply(u_darrays.linear_trend_lingress,nb_missing=10)
        dtrend = dtrend.unstack('allpoints')

        if sig:
            (dtrend['slope'].values)[dtrend['pval'].values > alpha] = 0

    ddtrend = dtrend['slope']

    if wilks and sig:
        try:
            pthresh = us.fdr_threshold(dtrend['pval'].values[np.isfinite(dtrend['pval'].values)], alpha=alpha)
            ddtrend.values[(dtrend['pval'].values > pthresh) | np.isnan(dtrend['pval'].values)] = np.nan
        except ValueError:
            ddtrend.values = ddtrend.values * np.nan
            pthresh = np.nan
        print('p value threshold', pthresh)

    # unstack back to lat lon coordinates
    return ddtrend, mean_years

Esempio n. 4

0

Mostra file

File: HU_trend_linear.py Progetto: zhpfu/proj_CEH

def calc_trend(data, month, hour=None, method=None, sig=False, wilks=False):

    if method is None:
        'Please provide trend calc method: polyfit or mk (mann kendall)'
    if hour is not None:

        if len(month) > 1:

            data = data[((data['time.month'] >= month[0])
                         & (data['time.month'] <= month[1]))
                        & (data['time.hour'] == hour) &
                        (data['time.year'] >= 1983) &
                        (data['time.year'] <= 2017)]
        else:

            data = data[(data['time.month'] == month[0])
                        & (data['time.hour'] == hour) &
                        (data['time.year'] >= 1983) &
                        (data['time.year'] <= 2017)]
    else:
        if len(month) > 1:
            data = data[((data['time.month'] >= month[0])
                         & (data['time.month'] <= month[1]))
                        & (data['time.year'] >= 1983) &
                        (data['time.year'] <= 2017)]
        else:
            data = data[(data['time.month'] == month[0])
                        & (data['time.year'] >= 1983) &
                        (data['time.year'] <= 2017)]

    if len(data.time) == 0:
        print(
            'Data does not seem to have picked month or hour. Please check input data'
        )

    #ipdb.set_trace()
    mean_years = data.groupby('time.year').mean('time')

    highpos = 0

    if mean_years.name == 'z':
        highpos = []
        three = mean_years.coarsen(year=11, boundary='trim').mean()
        for my in three:
            pos = np.unravel_index(np.argmax(my.values), my.values.shape)
            #ipdb.set_trace()
            mlat = my.latitude.values[pos[0]]
            mlon = my.longitude.values[pos[1]]
            highpos.append((mlon, mlat))

    if mean_years.name == 'u200':

        three = mean_years.coarsen(year=11, boundary='trim').mean()
        three.values = np.abs(three.values)
        # ipdb.set_trace()
        highpos = three.argmin('latitude')

    # stack lat and lon into a single dimension called allpoints
    datastacked = mean_years.stack(allpoints=['latitude', 'longitude'])

    # apply the function over allpoints to calculate the trend at each point
    print('Entering trend calc')

    alpha = 0.05
    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method == 'mk':
        dtrend = datastacked.groupby('allpoints').apply(
            u_darrays.linear_trend_mk, alpha=alpha, eps=0.01, nb_missing=10)
        dtrend = dtrend.unstack('allpoints')
        if sig:
            (dtrend['slope'].values)[dtrend['ind'].values == 0] = 0

    # NaNs means there is not enough data, slope = 0 means there is no significant trend.
    if method == 'polyfit':
        dtrend = datastacked.groupby('allpoints').apply(
            u_darrays.linear_trend_lingress, nb_missing=10)
        dtrend = dtrend.unstack('allpoints')

        if sig:
            (dtrend['slope'].values)[dtrend['pval'].values > alpha] = 0

    ddtrend = dtrend['slope']

    if wilks and sig:
        try:
            pthresh = us.fdr_threshold(dtrend['pval'].values[np.isfinite(
                dtrend['pval'].values)],
                                       alpha=alpha)
            ddtrend.values[(dtrend['pval'].values > pthresh)
                           | np.isnan(dtrend['pval'].values)] = np.nan
        except ValueError:
            ddtrend.values = ddtrend.values * np.nan
            pthresh = np.nan
        print('p value threshold', pthresh)

    # unstack back to lat lon coordinates
    return ddtrend, mean_years, highpos