def test_seasonal_test(NoTrendData, TrendData, arbitrary_1d_data): # check with no trend data NoTrendRes = mk.seasonal_test(NoTrendData, period=12) assert NoTrendRes.trend == 'no trend' assert NoTrendRes.h == False assert NoTrendRes.p == 1.0 assert NoTrendRes.z == 0 assert NoTrendRes.Tau == 0.0 assert NoTrendRes.s == 0.0 assert NoTrendRes.slope == 0.0 # check with trendy data TrendRes = mk.seasonal_test(TrendData, period=12) assert TrendRes.trend == 'increasing' assert TrendRes.h == True assert TrendRes.p == 0.0 assert TrendRes.Tau == 1.0 assert TrendRes.s == 5220.0 np.testing.assert_allclose(TrendRes.slope, 12, rtol=1e-02) # check with arbitrary data result = mk.seasonal_test(arbitrary_1d_data, period=12) assert result.trend == 'decreasing' assert result.h == True assert result.p == 0.03263834596177739 assert result.z == -2.136504114534638 assert result.Tau == -0.0794979079497908 assert result.s == -399.0 assert result.var_s == 34702.333333333336 assert result.slope == -0.16666666666666666
def mkTest(series, seasonal): if seasonal == False: data_mk = mk.original_test(series) trend = data_mk[0] else: data_mk_seasonal_test = mk.seasonal_test(series, period= 12) trend = data_mk_seasonal_test[0] if trend == 'decreasing' or trend == 'increasing': self.__trend__ = 'present' trend = 'present' return trend self.__trend__ = trend return trend
def add_element(self, value): ''' Add new element to the statistic ''' #reset parameters if change was detected: if self.in_concept_change: self.reset() #append elements: self.instance_memory.append(value) if len(self.instance_memory) == self.min_instances: self.sample_count = 1 if len(self.instance_memory) > self.min_instances: self.instance_count += 1 #start drift detection: >> min_instances have to be reached, then always perform test once, after that perform test every i_th instance (instances_step) if len(self.instance_memory) >= self.min_instances and ((self.instance_count == self.instances_step) or (self.sample_count == 1)): if self.test_type == 'original_mk': #call corresponding test from package: print('Perform MK test') results_tuple = mk.original_test(self.instance_memory, self.alpha) print('MK test ended') if self.test_type == 'hamed_rao_mod': #call corresponding test from package: results_tuple = mk.hamed_rao_modification_test(self.instance_memory, self.alpha) if self.test_type == 'yue_wang_mod': #call corresponding test from package: results_tuple = mk.yue_wang_modification_test(self.instance_memory, self.alpha) if self.test_type == 'trend_free_pre_whitening_mod': #call corresponding test from package: results_tuple = mk.trend_free_pre_whitening_modification_test(self.instance_memory, self.alpha) if self.test_type == 'pre_whitening_mod': #call corresponding test from package: results_tuple = mk.pre_whitening_modification_test(self.instance_memory, self.alpha) if self.test_type == 'seasonal': #call corresponding test from package: results_tuple = mk.seasonal_test(self.instance_memory, period = self.period, alpha = self.alpha) #reset counter every time a test was performed: self.sample_count = 0 self.instance_count = 0 #assign results: self.p_value = results_tuple[2] self.sens_slope = results_tuple[-1] self.trend = results_tuple[0] if self.p_value < self.alpha and np.abs(self.sens_slope) > self.slope_threshold: self.in_concept_change = True else: self.in_concept_change = False
#%% daily mean daily_mean = Shadipur_Del.O3[32:].resample('D').mean() monthly_mean = Shadipur_Del.O3[32:].resample('M').mean() #%% insert the daily mean values from CPCB daily_mean_new = pd.read_csv( '/home/lp555/Delhi/Observations/Delhi_Shadipur_daily_2011_to_2019_edited.csv', usecols=[0, 1]) daily_mean_new.date = pd.to_datetime(daily_mean_new.date, dayfirst=True) daily_mean_new.set_index('date', inplace=True) daily_mean_new.replace(0, np.nan, inplace=True) monthly_mean_of_dm = daily_mean_new.resample('M').mean() #%% ax = monthly_mean_of_dm[36:].plot(figsize=(9, 3), color='k', marker='o') ax.set_ylabel('monthly mean of daily mean Shadipur Delhi 2014 to 2019') #%% SMK mk.seasonal_test(monthly_mean_of_dm[36:], period=12) #%% monthly_mean_of_dm["O3"] mmdm = np.array(monthly_mean_of_dm["O3"]) mmdm = mmdm[:96] #%% mmdm_reshape = np.reshape(mmdm, (8, 12)) mmdm_month_mean = np.nanmean(mmdm_reshape, axis=0) plt.plot(mmdm_month_mean) #%% mmdm_month_std = np.nanstd(mmdm_reshape, axis=0) plt.errorbar(np.arange(1, 13, 1), mmdm_month_mean, yerr=mmdm_month_std) #%% crearing a linear fit of the monthly mean data - first step converting pandas dataframe to a numpy array #from scipy import stats #mmdm1 = np.array(monthly_mean_of_dm["O3"]) #x = monthly_mean_of_dm.index
df_O3.sort_values(by=['Date'], inplace = True) beijing_O3 = df_O3.set_index('Date') del beijing_O3['pollutant'] beijing_O3 = beijing_O3/2 # to convert ug/m3 to ppb #%% wl_O3 = beijing_O3.WL wl_O3 = pd.DataFrame(wl_O3) wl_O3_dm = wl_O3.resample('D').mean() wl_O3_dm.plot() #%% #beijing_O3.to_csv('/home/lp555/Beijing/Observations/Sinaapp/data/Beijing_O3_2014_to_2019/beijing_O3_all.csv') #%% Wanliu Monthly mean of Daily mean wl_O3_mmdm = wl_O3_dm.resample('M').mean() wl_O3_mmdm.plot(marker = 'o') #%% mk_result_O3 = mk.seasonal_test(wl_O3_mmdm[9:], period = 12) # ignored the year 2014 or the increasing trend would be more significant print(mk_result_O3) #%% #def station_dm(df_station): #df_station_dm = df_station.resample('D').mean() # convert to df before this #print(df_station_dm) beijing_O3_dm = beijing_O3.resample('D').mean() urban_dm = beijing_O3_dm.loc[:, 'DS':'GC'] suburban_dm = beijing_O3_dm.loc[: , 'FS':'YQ'] dl_dm = pd.DataFrame(beijing_O3_dm.loc[:, 'DL']) regional_bg_dm = beijing_O3_dm.loc[:, 'BDL':'LLH'] traffic_dm = beijing_O3_dm.loc[:, 'QM':'DSH'] #beijing_O3_dm.plot(subplots = True, layout=(5,7), figsize = (6,9)) #%% see if need to set the axes with same values? BELOW dm for different types of stations #fig, axes = plt.subplots(nrows = 4, ncols = 3) #plt.title('Urban Stations Daily Mean', fontsize = 16)
""" Created on Thu Jan 16 10:31:07 2020 @author: lp555 """ #%% import numpy as np import pandas as pd import matplotlib.pyplot as plt import pymannkendall as mk import statsmodels.api as sm import matplotlib.dates as mdates from matplotlib.dates import DateFormatter #%% read data - deal with missing data and convert data type to float Anand_Del = pd.read_csv('/home/lp555/Delhi/Observations/AnandVihar_hourly_2015_2019_edited.csv') Anand_Del.columns = ['date', 'O3'] Anand_Del.date = pd.to_datetime(Anand_Del.date, dayfirst = True) Anand_Del.set_index('date', inplace = True) Anand_Del.replace(to_replace = ["None"], value = np.nan, inplace = True) Anand_Del.O3 = Anand_Del.O3.astype(float)/2 #%% resample to daily mean - MDA 8 may not be useful daily_mean = Anand_Del.O3.resample('D').mean() daily_mean = pd.DataFrame(daily_mean) #%% resample to monthly mean of daily mean mmdm = daily_mean.O3.resample('M').mean() ax = mmdm.plot(figsize=(9,3), color = 'k', marker = 'o') ax.set_ylabel('monthly mean of daily mean Anand Delhi 2015 to 2019') #%% Seasonal Mann Kendall (SMK) test on the trend (ozone season) mk.seasonal_test(mmdm, period = 4) #%%
def par_trend(n, input_param): """ :param n: int: parallel index :param input_param: type(dict): {'dt': time-date array 'path2data': path where to read data series for which calculating the trend 'path2slopes': path where to save the calculated trend infos 'data_mask': mask array, set the index of valid pixels (not-nan) 'head': head defining the name of temporary data to read 'step': used to define the index of the matrix-chunk relative to the current loop 'nloops': parallel loop index number 'dbg': enables debug mode 'fid': object of kind open(filename) points to the log_file where to write debug output 'frequency' frequency of the timeseries, i.e. how many observations per year :return: Save a npy temporary file into the save_path directory for each loop of parallel cycle """ dt = input_param['dt'] d_path = input_param['path2data'] s_path = input_param['path2slopes'] head = input_param['head'] wm = input_param['data_mask'] step = input_param['step'] nloops = input_param['nloops'] dbg = input_param['dbg'] fid = input_param['fid'] frequency = input_param['frequency'] threshold = input_param['threshold'] if dbg: fid.writelines('start parallel loop index ' + str(n) + '\n') sl_name = s_path + head + '-' + str(n).zfill(2) + '.npy' if not os.path.exists(sl_name): i0 = step * n i1 = (n + 1) * step if n + 1 == nloops: i1 = None # reading temporary data chunk as saved in previous step if dbg: fid.writelines('reading chunk ' + str(n) + '\n') data = np.load(d_path + head + '-' + str(n).zfill(2) + '.npy') wm = wm[i0:i1] ind_good = np.where(wm != 0) slopes = np.full_like(wm, fill_value=np.nan) interc = np.full_like(wm, fill_value=np.nan) pvalue = np.full_like(wm, fill_value=np.nan) for k in ind_good[0]: d = data[k, :] ts = pd.Series(d, index=pd.to_datetime(dt)) trend_out = mk.seasonal_test(ts, period=frequency, alpha=threshold) slopes[k] = trend_out.slope / frequency interc[k] = trend_out.intercept pvalue[k] = trend_out.p np.save(sl_name, [slopes, interc, pvalue]) if dbg: fid.writelines('end parallel loop index ' + str(n) + '\n')
] labs = [ 'Ice shelf stations, modelled', 'Inlet stations, modelled', 'SAM index' ] lgd = ax.legend(lns, labs, bbox_to_anchor=(0.55, 1.1), loc=2, fontsize=20) frame = lgd.get_frame() frame.set_facecolor('white') for ln in lgd.get_texts(): plt.setp(ln, color='dimgrey') lgd.get_frame().set_linewidth(0.0) plt.subplots_adjust(left=0.24, right=0.85) plt.savefig('Total_time_srs_modelled_foehn_frequency_vs_SAM.png', transparent=True) plt.savefig('Total_time_srs_modelled_foehn_frequency_vs_SAM.eps', transparent=True) plt.show() foehn_time_srs() DJF = plt.plot(iceshelf_mod.DJF.values[:-2] / 7.3, label='DJF') MAM = plt.plot(iceshelf_mod.MAM.values[:-2] / 7.3, label='MAM') JJA = plt.plot(iceshelf_mod.JJA.values[:-2] / 7.3, label='JJA') SON = plt.plot(iceshelf_mod.SON.values[:-2] / 7.3, label='SON') import pymannkendall as mk result = mk.seasonal_test(inlet_mod.SON) plt.plot(df.index, df.IWP, label='IWP') yr_mn = df.IWP.rolling(window=7305, center=True).mean() plt.plot(yr_mn, label='yearly rolling mean') plt.show()
WVP_masked = np.mean(all_vars['WVP'].data[:, 40:140, 85:155], axis=(1, 2)) df = pd.DataFrame([ pd.Series(cl_masked[:7305]), pd.Series(WVP_masked[:7305]), pd.Series(LWP_masked[:7305]), pd.Series(IWP_masked[:7305]) ], index=['cl', 'WVP', 'LWP', 'IWP']) df = df.transpose() df['datetime'] = pd.date_range(datetime(1998, 1, 1, 0, 0, 0), datetime(2017, 12, 31, 23, 59, 59), freq='D') df = df.set_index('datetime') import pymannkendall as mk result = mk.seasonal_test(df.cl) plt.plot(df.index, df.IWP, label='IWP') yr_mn = df.IWP.rolling(window=7305, center=True).mean() plt.plot(yr_mn, label='yearly rolling mean') plt.show() def calc_percent_cloudy(cl_masked): ''' Calculate percentage of time where ice-shelf integrated cloud fraction falls into one of the following categories, as defined in Kay et al. (2008) doi: 10.1029/2011RG000363: 1. 'Clear': cloud fraction < 0.31 2. 'Scattered cloud': 0.31 < cloud fraction > 0.75 3. ' Broken cloud': 0.75 < cloud fraction > 1.0 4. 'Overcast': cloud fraction == 1.0