def test_seasonal_test(NoTrendData, TrendData, arbitrary_1d_data):
    # check with no trend data
    NoTrendRes = mk.seasonal_test(NoTrendData, period=12)
    assert NoTrendRes.trend == 'no trend'
    assert NoTrendRes.h == False
    assert NoTrendRes.p == 1.0
    assert NoTrendRes.z == 0
    assert NoTrendRes.Tau == 0.0
    assert NoTrendRes.s == 0.0
    assert NoTrendRes.slope == 0.0

    # check with trendy data
    TrendRes = mk.seasonal_test(TrendData, period=12)
    assert TrendRes.trend == 'increasing'
    assert TrendRes.h == True
    assert TrendRes.p == 0.0
    assert TrendRes.Tau == 1.0
    assert TrendRes.s == 5220.0
    np.testing.assert_allclose(TrendRes.slope, 12, rtol=1e-02)

    # check with arbitrary data
    result = mk.seasonal_test(arbitrary_1d_data, period=12)
    assert result.trend == 'decreasing'
    assert result.h == True
    assert result.p == 0.03263834596177739
    assert result.z == -2.136504114534638
    assert result.Tau == -0.0794979079497908
    assert result.s == -399.0
    assert result.var_s == 34702.333333333336
    assert result.slope == -0.16666666666666666
 def mkTest(series, seasonal):
     
     if seasonal == False:
         data_mk = mk.original_test(series)
         trend = data_mk[0]
     else:
         data_mk_seasonal_test = mk.seasonal_test(series, period= 12)
         trend = data_mk_seasonal_test[0]
     
     if trend == 'decreasing' or trend == 'increasing':
         self.__trend__ = 'present' 
         trend = 'present'
         return trend
     self.__trend__ = trend
     return trend
Beispiel #3
0
 def add_element(self, value):
     
     '''
     Add new element to the statistic
             
     '''
     
     #reset parameters if change was detected:
     if self.in_concept_change:
         self.reset()
     
     
     
     #append elements:
     self.instance_memory.append(value)
     
                 
     
     if len(self.instance_memory) == self.min_instances:
         self.sample_count = 1
     
     if len(self.instance_memory) > self.min_instances:
         self.instance_count += 1
         
     #start drift detection: >> min_instances have to be reached, then always perform test once, after that perform test every i_th instance (instances_step)
     if len(self.instance_memory) >= self.min_instances and ((self.instance_count == self.instances_step) or (self.sample_count == 1)):
         
         if self.test_type == 'original_mk':
             
             #call corresponding test from package:
             print('Perform MK test')
             results_tuple = mk.original_test(self.instance_memory, self.alpha)
             print('MK test ended')
 
         
         if self.test_type == 'hamed_rao_mod':
             
             #call corresponding test from package:
             results_tuple = mk.hamed_rao_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'yue_wang_mod':
             
             #call corresponding test from package:
             results_tuple = mk.yue_wang_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'trend_free_pre_whitening_mod':
             
             #call corresponding test from package:
             results_tuple = mk.trend_free_pre_whitening_modification_test(self.instance_memory, self.alpha)
         
         if self.test_type == 'pre_whitening_mod':
             
             #call corresponding test from package:
             results_tuple = mk.pre_whitening_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'seasonal':
             
             #call corresponding test from package:
             results_tuple = mk.seasonal_test(self.instance_memory, period = self.period, alpha = self.alpha)
         
         
         #reset counter every time a test was performed:
         self.sample_count = 0
         self.instance_count = 0
         
         
         #assign results:
         self.p_value = results_tuple[2]
         self.sens_slope = results_tuple[-1]
         self.trend = results_tuple[0]  
             
                     
         if self.p_value < self.alpha and np.abs(self.sens_slope) > self.slope_threshold:
             self.in_concept_change = True
                
         else:
             self.in_concept_change = False
#%% daily mean
daily_mean = Shadipur_Del.O3[32:].resample('D').mean()
monthly_mean = Shadipur_Del.O3[32:].resample('M').mean()
#%% insert the daily mean values from CPCB
daily_mean_new = pd.read_csv(
    '/home/lp555/Delhi/Observations/Delhi_Shadipur_daily_2011_to_2019_edited.csv',
    usecols=[0, 1])
daily_mean_new.date = pd.to_datetime(daily_mean_new.date, dayfirst=True)
daily_mean_new.set_index('date', inplace=True)
daily_mean_new.replace(0, np.nan, inplace=True)
monthly_mean_of_dm = daily_mean_new.resample('M').mean()
#%%
ax = monthly_mean_of_dm[36:].plot(figsize=(9, 3), color='k', marker='o')
ax.set_ylabel('monthly mean of daily mean Shadipur Delhi 2014 to 2019')
#%% SMK
mk.seasonal_test(monthly_mean_of_dm[36:], period=12)
#%%
monthly_mean_of_dm["O3"]
mmdm = np.array(monthly_mean_of_dm["O3"])
mmdm = mmdm[:96]
#%%
mmdm_reshape = np.reshape(mmdm, (8, 12))
mmdm_month_mean = np.nanmean(mmdm_reshape, axis=0)
plt.plot(mmdm_month_mean)
#%%
mmdm_month_std = np.nanstd(mmdm_reshape, axis=0)
plt.errorbar(np.arange(1, 13, 1), mmdm_month_mean, yerr=mmdm_month_std)
#%% crearing a linear fit of the monthly mean data - first step converting pandas dataframe to a numpy array
#from scipy import stats
#mmdm1 = np.array(monthly_mean_of_dm["O3"])
#x = monthly_mean_of_dm.index
df_O3.sort_values(by=['Date'], inplace = True)
beijing_O3 = df_O3.set_index('Date')
del beijing_O3['pollutant']
beijing_O3 = beijing_O3/2 # to convert ug/m3 to ppb
#%%
wl_O3 = beijing_O3.WL
wl_O3 = pd.DataFrame(wl_O3)
wl_O3_dm = wl_O3.resample('D').mean()
wl_O3_dm.plot()
#%%
#beijing_O3.to_csv('/home/lp555/Beijing/Observations/Sinaapp/data/Beijing_O3_2014_to_2019/beijing_O3_all.csv')
#%% Wanliu Monthly mean of Daily mean
wl_O3_mmdm = wl_O3_dm.resample('M').mean()
wl_O3_mmdm.plot(marker = 'o')
#%%
mk_result_O3 = mk.seasonal_test(wl_O3_mmdm[9:], period = 12) # ignored the year 2014 or the increasing trend would be more significant
print(mk_result_O3)
#%%
#def station_dm(df_station):
    #df_station_dm = df_station.resample('D').mean() # convert to df before this
    #print(df_station_dm)
beijing_O3_dm = beijing_O3.resample('D').mean()
urban_dm = beijing_O3_dm.loc[:, 'DS':'GC']
suburban_dm = beijing_O3_dm.loc[: , 'FS':'YQ']
dl_dm = pd.DataFrame(beijing_O3_dm.loc[:, 'DL'])
regional_bg_dm = beijing_O3_dm.loc[:, 'BDL':'LLH']
traffic_dm = beijing_O3_dm.loc[:, 'QM':'DSH']
#beijing_O3_dm.plot(subplots = True, layout=(5,7), figsize = (6,9))
#%% see if need to set the axes with same values? BELOW dm for different types of stations
#fig, axes = plt.subplots(nrows = 4, ncols = 3)
#plt.title('Urban Stations Daily Mean', fontsize = 16)
Beispiel #6
0
"""
Created on Thu Jan 16 10:31:07 2020

@author: lp555
"""

#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymannkendall as mk
import statsmodels.api as sm
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
#%% read data - deal with missing data and convert data type to float
Anand_Del = pd.read_csv('/home/lp555/Delhi/Observations/AnandVihar_hourly_2015_2019_edited.csv')
Anand_Del.columns = ['date', 'O3']
Anand_Del.date = pd.to_datetime(Anand_Del.date, dayfirst = True)
Anand_Del.set_index('date', inplace = True)
Anand_Del.replace(to_replace = ["None"], value = np.nan, inplace = True)
Anand_Del.O3 = Anand_Del.O3.astype(float)/2
#%% resample to daily mean - MDA 8 may not be useful
daily_mean = Anand_Del.O3.resample('D').mean()
daily_mean = pd.DataFrame(daily_mean)
#%% resample to monthly mean of daily mean
mmdm = daily_mean.O3.resample('M').mean()
ax = mmdm.plot(figsize=(9,3), color = 'k', marker = 'o')
ax.set_ylabel('monthly mean of daily mean Anand Delhi 2015 to 2019')
#%% Seasonal Mann Kendall (SMK) test on the trend (ozone season)
mk.seasonal_test(mmdm, period = 4)
#%%
Beispiel #7
0
def par_trend(n, input_param):
    """
    :param n:               int: parallel index
    :param input_param:     type(dict):
                            {'dt':          time-date array
                             'path2data':   path where to read data series for which calculating the trend
                             'path2slopes': path where to save the calculated trend infos
                             'data_mask':   mask array, set the index of valid pixels (not-nan)
                             'head':        head defining the name of temporary data to read
                             'step':        used to define the index of the matrix-chunk relative to the current loop
                             'nloops':      parallel loop index number
                             'dbg':         enables debug mode
                             'fid':         object of kind open(filename) points to the log_file where to write
                                            debug output
                             'frequency'    frequency of the timeseries, i.e. how many observations per year
    :return:

        Save a npy temporary file into the save_path directory for each loop of parallel cycle


    """
    dt = input_param['dt']
    d_path = input_param['path2data']
    s_path = input_param['path2slopes']
    head = input_param['head']
    wm = input_param['data_mask']
    step = input_param['step']
    nloops = input_param['nloops']
    dbg = input_param['dbg']
    fid = input_param['fid']
    frequency = input_param['frequency']
    threshold = input_param['threshold']

    if dbg:
        fid.writelines('start parallel loop index ' + str(n) + '\n')

    sl_name = s_path + head + '-' + str(n).zfill(2) + '.npy'
    if not os.path.exists(sl_name):
        i0 = step * n
        i1 = (n + 1) * step
        if n + 1 == nloops:
            i1 = None
        # reading temporary data chunk as saved in previous step
        if dbg:
            fid.writelines('reading chunk ' + str(n) + '\n')
        data = np.load(d_path + head + '-' + str(n).zfill(2) + '.npy')
        wm = wm[i0:i1]
        ind_good = np.where(wm != 0)

        slopes = np.full_like(wm, fill_value=np.nan)
        interc = np.full_like(wm, fill_value=np.nan)
        pvalue = np.full_like(wm, fill_value=np.nan)

        for k in ind_good[0]:
            d = data[k, :]
            ts = pd.Series(d, index=pd.to_datetime(dt))

            trend_out = mk.seasonal_test(ts, period=frequency, alpha=threshold)

            slopes[k] = trend_out.slope / frequency
            interc[k] = trend_out.intercept
            pvalue[k] = trend_out.p

        np.save(sl_name, [slopes, interc, pvalue])

        if dbg:
            fid.writelines('end parallel loop index ' + str(n) + '\n')
Beispiel #8
0
    ]
    labs = [
        'Ice shelf stations, modelled', 'Inlet stations, modelled', 'SAM index'
    ]
    lgd = ax.legend(lns, labs, bbox_to_anchor=(0.55, 1.1), loc=2, fontsize=20)
    frame = lgd.get_frame()
    frame.set_facecolor('white')
    for ln in lgd.get_texts():
        plt.setp(ln, color='dimgrey')
    lgd.get_frame().set_linewidth(0.0)
    plt.subplots_adjust(left=0.24, right=0.85)
    plt.savefig('Total_time_srs_modelled_foehn_frequency_vs_SAM.png',
                transparent=True)
    plt.savefig('Total_time_srs_modelled_foehn_frequency_vs_SAM.eps',
                transparent=True)
    plt.show()


foehn_time_srs()

DJF = plt.plot(iceshelf_mod.DJF.values[:-2] / 7.3, label='DJF')
MAM = plt.plot(iceshelf_mod.MAM.values[:-2] / 7.3, label='MAM')
JJA = plt.plot(iceshelf_mod.JJA.values[:-2] / 7.3, label='JJA')
SON = plt.plot(iceshelf_mod.SON.values[:-2] / 7.3, label='SON')

import pymannkendall as mk
result = mk.seasonal_test(inlet_mod.SON)
plt.plot(df.index, df.IWP, label='IWP')
yr_mn = df.IWP.rolling(window=7305, center=True).mean()
plt.plot(yr_mn, label='yearly rolling mean')
plt.show()
Beispiel #9
0
WVP_masked = np.mean(all_vars['WVP'].data[:, 40:140, 85:155], axis=(1, 2))
df = pd.DataFrame([
    pd.Series(cl_masked[:7305]),
    pd.Series(WVP_masked[:7305]),
    pd.Series(LWP_masked[:7305]),
    pd.Series(IWP_masked[:7305])
],
                  index=['cl', 'WVP', 'LWP', 'IWP'])
df = df.transpose()
df['datetime'] = pd.date_range(datetime(1998, 1, 1, 0, 0, 0),
                               datetime(2017, 12, 31, 23, 59, 59),
                               freq='D')
df = df.set_index('datetime')

import pymannkendall as mk
result = mk.seasonal_test(df.cl)
plt.plot(df.index, df.IWP, label='IWP')
yr_mn = df.IWP.rolling(window=7305, center=True).mean()
plt.plot(yr_mn, label='yearly rolling mean')
plt.show()


def calc_percent_cloudy(cl_masked):
    ''' Calculate percentage of time where ice-shelf integrated cloud fraction falls into one of the following
    categories, as defined in Kay et al. (2008) doi: 10.1029/2011RG000363:

        1. 'Clear': cloud fraction < 0.31
        2. 'Scattered cloud': 0.31 < cloud fraction > 0.75
        3. ' Broken cloud': 0.75 < cloud fraction > 1.0
        4. 'Overcast': cloud fraction == 1.0