def sig_test( list_dir="", data_dir="", monte=True, sttest=False, tcrit=0, list_name="era40_blocking_thpv2.list", high=True, month="thpv2", trials=1000, cutoff=0.8, ): try: from open import stdata from open import read_list from numpy import zeros # determining whether list is high/low solar data hh = "high" if high != True: hh = "low" # filepath of solar data list name = list_dir + hh + "_" + list_name # extracting data to arrays solar_data = stdata(name, directory=data_dir, monthly=month) all_data = stdata(list_dir + list_name, directory=data_dir, monthly=month) # blocking frequency of hig/low solar and climatological blocking frequency clim = read_list(list_dir + list_name, data_dir) b_hls = read_list(name, data_dir) # test statistic diff = np.array(solar_data) - np.array(clim) # t-test to find significant lat-lon points at a specific confidence level if sttest == True: opt = ttest(zeros(diff.shape), diff, tcrit) return opt # monte carlo bootstrap method for determining a lat/lon array of significances if monte == True: from random import randint # generate trial values for analysis for trial in range(trials): # generate len(solar_data) random years and initial zero array test = zeros(all_data[0].shape) for i in range(len(solar_data)): year = randint(0, len(solar_data) - 1) # check for correct shape, exit is not if all_data[year].shape != (20, 96): exit(0) # append each randomly generated year to test array test += all_data[year] # first trial condition if trial == 0: # generate statistic values = test / len(solar_data) - clim # values = np.array(diff_test(test/len(solar_data),clim)) # reshape for concatenation values.shape = (len(values), len(values[0]), 1) # same method as above for subsequent trials elif trial != 0: tmp = np.array(test / len(solar_data) - clim) # tmp = np.array(diff_test(test/len(solar_data),clim)) tmp.shape = (len(values), len(values[0]), 1) # concatenate arrays to form final array values = np.concatenate((values, tmp), 2) print values.shape fig = plt.figure() plt.hist(values[6][18]) axes = plt.gca() # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1) # axes.plot(x, y, 'r') axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) plt.title(title) fig.show() print diff[6][18] # reshape difference array for concatenation diff.shape = (len(values), len(values[0]), 1) # return the index within each element of the array that will sort the values tmp = np.mean(values, axis=2) values = np.concatenate((values, diff), 2).argsort().argsort() # this second argsort is essential tmp2 = values # print values # account for odd behaviour for when both are zero for lat in range(len(values)): for lon in range(len(values[lat])): if values[lat][lon][-1] == trials: # and diff[lat][lon][0] == 0: values[lat][lon][-1] = trials / 2 # if lat == 19: # print values[19][lon][-1],diff[19][lon] # isolate index that the difference array will need when sorting sig = np.delete(values, s_[:-1], 2) # reshape array to lat/lon style sig.shape = (len(values), len(values[0])) # transform indices into probabilities sig = sig.astype(float) / float(trials) # # alternate method - I consider this to be incorrect but did produce okay graphs # sig = (values == trials).nonzero()[-1] # sig = sig.astype(float)/float(trials) #generate an array of 1s and 0s depending if in range of two tailed significance #values <lower limit lower = (1 - cutoff) / 2 opt = zeros(sig.shape) for lat in range(len(values)): for lon in range(len(values[lat])): if sig[lat][lon] > cutoff + lower: opt[lat][lon] = 1 if sig[lat][lon] < lower: opt[lat][lon] = 1 # opt2 = - (sig - (1+lower)).astype(int) # #values > upper limit # opt2 = opt2 + (sig + (1-cutoff)/2).astype(int) # for lat in range(len(values)): # for lon in range(len(values[lat])): # if opt1[lat][lon] != opt2[lat][lon]: # print lat*3.72,lon*3.75,diff[lat][lon],tmp[lat][lon],opt1[lat][lon],opt2[lat][lon],tmp2[lat][lon][-1] return opt.astype(int) except IOError as err: print "File error: " + str(err) except ValueError as err: print "Value Error: " + str(err)
# critical t value (to be entered manually) tcrit = 2.03 sig_lvl = 0.05 cutoff = 0.9 # output file name and othe graphing options graph = True stype = 'ttest' output = '/media/jonathan/KINGSTON/blocking/graphs/test.' filled = False # open and read list into an array import open all_data = open.stdata(king_list+str(run)+'/'+str(run)+'.1860-2010.thpv2_months.list',dat,monthly='thpv2',daily=False,total=False,numpy=True) # extract the high/low time values # identify start start = 1860 jump1 = 1940 jump2 = 1950 jump3 = 2010 from solar import quantile # check TSI wrt time def check_TSI(start=1860,end=2100,compress=True,graph=False): # open data files
try: if arr != True from open import open_pkl #read in x-y data opt = [] data_xy = open_pkl(king_dat,'era40.gga'+v+'.year-2002.month-01.b.'+type+'_003.duration_ge_5_day.pkl') Lon,Lat = data_xy['lon']['lon'],data_xy['lat']['lat'] Lon = np.append(Lon,360+Lon[0]) #print Lon X,Y = meshgrid(Lon,Lat) opt.append(X) opt.append(Y) # #read in data from open import stdata all_data = stdata('[DIR]/era40_blocking_thpv2.list',directory='/media/jonathan/KINGSTON/blocking/data/pkl_files/blocking/',monthly='thpv2') from solar import years yrs = np.array(years()['SCmin'])-1957 if high == True: yrs = np.array(years()['SCmax'])-1957 data = np.mean(all_data[yrs],axis=0) # data = read_list(king+'high_era40_blocking_'+type+'.list',king_dat) #generate listname listnm = 'era40_blocking_'+str(type)+'_high_blk' stype = 'high' elif high != True: yrs = np.array(years()['SCmin'])-1957 data = np.mean(all_data[yrs],axis=0) # data = read_list(king+'low_era40_blocking_'+type+'.list',king_dat) #generate listname