def diff1(king,king_dat,output,high=True,type='thpv2',v='v',monte=False,sttest=True,tcrit=0.87,graph=True,filled=False,sig=False,monthly=1,trials=1000,cutoff=0.90): ################################################################# # METHOD: 25/11/13 # # # # take high/low solar data # # compute average blocking frequency for each lat lon point # # then difference from all-time data (climatological data) # # plot this difference using basemap and compare to Woolings # # Note: this function returns the difference data # # # ################################################################# try: from open import open_pkl #read in x-y data opt = [] data_xy = open_pkl(king_dat,'era40.gga'+v+'.year-2002.month-01.b.'+type+'_003.duration_ge_5_day.pkl') Lon,Lat = data_xy['lon']['lon'],data_xy['lat']['lat'] Lon = np.append(Lon,360+Lon[0]) #print Lon X,Y = meshgrid(Lon,Lat) opt.append(X) opt.append(Y) # #read in data if high == True: data = read_list(king+'high_era40_blocking_'+type+'.list',king_dat) #generate listname listnm = 'era40_blocking_'+str(type)+'_high_blk' stype = 'high' elif high != True: data = read_list(king+'low_era40_blocking_'+type+'.list',king_dat) #generate listname listnm = 'era40_blocking_'+str(type)+'_low_blk' stype = 'low' clim = read_list(king+'era40_blocking_'+type+'.list',king_dat) #checks of data read in if len(data) != len(clim) or len(data[0]) != len(clim[0]): print "Error: Array lengths don't match\nData: "+str(len(data)),str(len(data[0]))+"\nClim: "+str(len(clim)),str(len(clim[0])) return 0 #compute difference array diff = zeros(shape=(len(data),len(data[0])+1)) test = [] for lat in range(0,len(data)): for lon in range(0,len(data[0])): diff[lat][lon] = -clim[lat][lon] + data[lat][lon] #final value for diff for lat in range(0, len(data)): diff[lat][-1] = -clim[lat][0] + data[lat][0] opt.append(diff) if sig == True: try: from sig_test import sig_test regions = sig_test(list_name='era40_blocking_thpv2.list',list_dir=king,monte=monte,sttest=sttest,tcrit=tcrit,data_dir=king_dat,high=high,trials=1000,cutoff=cutoff) extra = regions[:,0] extra.shape = (regions[:,0].shape[0],1) regions = np.concatenate((regions,extra),1) except ValueError as err: print "Value Error: " + str(err) if graph == True: #generate output filename if monte == True: sgtype = 'monte' if sttest == True: sgtype = 'ttest' output = str(output)+'blk'+str(listnm)+str(sgtype)+str(int(100*(1-cutoff)+1))+'.png' from mpl_toolkits.basemap import Basemap import matplotlib.pyplot as plt # use low resolution coastlines. fig,ax = plt.subplots() # fig = plt.figure() map = fig.add_subplot() map = Basemap(boundinglat=Lat[-1],lon_0=0,projection='npaeqd',resolution='l',round=True) lon,lat = np.array(opt[0]),np.array(opt[1]) x,y = map(lon,lat) # draw coastlines, country boundaries, fill continents. map.drawcoastlines(linewidth=0.25) map.drawcountries(linewidth=0.25) # draw the edge of the map projection region (the projection limb) map.drawmapboundary() #if sig == True: map.fill() if filled == False: a = np.array(range(-100,102,2))/float(100) p = map.contour(x,y,np.array(opt[2]),a,colors='k') elif filled == True: a = np.array(range(-100,102,2))/float(100) c = map.contour(x,y,np.array(opt[2]),10,linestyles='solid',colors='black') p = map.contourf(x,y,-np.array(opt[2]),10,cmap=cm.RdBu,vmin=np.array(opt[2]).min(),vmax=np.array(opt[2]).max(),alpha=0.5,) cb = fig.colorbar(p, ax=ax) t = ax.set_title('blk'+stype+'-clim') if sig == True: # masking the array regions = np.ma.array(regions) interior = regions < 0.5 regions[interior] = np.ma.masked s = map.contourf(x,y,regions,1,cmap=cm.gray_r) t = ax.set_title('blk'+str(stype)+'-clim (sig '+str(int(100*(1.0-cutoff)+1))+'%)') fig.savefig(str(output)) return opt except IOError as err: print "File error: " + str(err) except ValueError as err: print "Value Error: " + str(err)
def sig_test( list_dir="", data_dir="", monte=True, sttest=False, tcrit=0, list_name="era40_blocking_thpv2.list", high=True, month="thpv2", trials=1000, cutoff=0.8, ): try: from open import stdata from open import read_list from numpy import zeros # determining whether list is high/low solar data hh = "high" if high != True: hh = "low" # filepath of solar data list name = list_dir + hh + "_" + list_name # extracting data to arrays solar_data = stdata(name, directory=data_dir, monthly=month) all_data = stdata(list_dir + list_name, directory=data_dir, monthly=month) # blocking frequency of hig/low solar and climatological blocking frequency clim = read_list(list_dir + list_name, data_dir) b_hls = read_list(name, data_dir) # test statistic diff = np.array(solar_data) - np.array(clim) # t-test to find significant lat-lon points at a specific confidence level if sttest == True: opt = ttest(zeros(diff.shape), diff, tcrit) return opt # monte carlo bootstrap method for determining a lat/lon array of significances if monte == True: from random import randint # generate trial values for analysis for trial in range(trials): # generate len(solar_data) random years and initial zero array test = zeros(all_data[0].shape) for i in range(len(solar_data)): year = randint(0, len(solar_data) - 1) # check for correct shape, exit is not if all_data[year].shape != (20, 96): exit(0) # append each randomly generated year to test array test += all_data[year] # first trial condition if trial == 0: # generate statistic values = test / len(solar_data) - clim # values = np.array(diff_test(test/len(solar_data),clim)) # reshape for concatenation values.shape = (len(values), len(values[0]), 1) # same method as above for subsequent trials elif trial != 0: tmp = np.array(test / len(solar_data) - clim) # tmp = np.array(diff_test(test/len(solar_data),clim)) tmp.shape = (len(values), len(values[0]), 1) # concatenate arrays to form final array values = np.concatenate((values, tmp), 2) print values.shape fig = plt.figure() plt.hist(values[6][18]) axes = plt.gca() # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1) # axes.plot(x, y, 'r') axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) plt.title(title) fig.show() print diff[6][18] # reshape difference array for concatenation diff.shape = (len(values), len(values[0]), 1) # return the index within each element of the array that will sort the values tmp = np.mean(values, axis=2) values = np.concatenate((values, diff), 2).argsort().argsort() # this second argsort is essential tmp2 = values # print values # account for odd behaviour for when both are zero for lat in range(len(values)): for lon in range(len(values[lat])): if values[lat][lon][-1] == trials: # and diff[lat][lon][0] == 0: values[lat][lon][-1] = trials / 2 # if lat == 19: # print values[19][lon][-1],diff[19][lon] # isolate index that the difference array will need when sorting sig = np.delete(values, s_[:-1], 2) # reshape array to lat/lon style sig.shape = (len(values), len(values[0])) # transform indices into probabilities sig = sig.astype(float) / float(trials) # # alternate method - I consider this to be incorrect but did produce okay graphs # sig = (values == trials).nonzero()[-1] # sig = sig.astype(float)/float(trials) #generate an array of 1s and 0s depending if in range of two tailed significance #values <lower limit lower = (1 - cutoff) / 2 opt = zeros(sig.shape) for lat in range(len(values)): for lon in range(len(values[lat])): if sig[lat][lon] > cutoff + lower: opt[lat][lon] = 1 if sig[lat][lon] < lower: opt[lat][lon] = 1 # opt2 = - (sig - (1+lower)).astype(int) # #values > upper limit # opt2 = opt2 + (sig + (1-cutoff)/2).astype(int) # for lat in range(len(values)): # for lon in range(len(values[lat])): # if opt1[lat][lon] != opt2[lat][lon]: # print lat*3.72,lon*3.75,diff[lat][lon],tmp[lat][lon],opt1[lat][lon],opt2[lat][lon],tmp2[lat][lon][-1] return opt.astype(int) except IOError as err: print "File error: " + str(err) except ValueError as err: print "Value Error: " + str(err)
if high == True: yrs = np.array(years()['SCmax'])-1957 data = np.mean(all_data[yrs],axis=0) # data = read_list(king+'high_era40_blocking_'+type+'.list',king_dat) #generate listname listnm = 'era40_blocking_'+str(type)+'_high_blk' stype = 'high' elif high != True: yrs = np.array(years()['SCmin'])-1957 data = np.mean(all_data[yrs],axis=0) # data = read_list(king+'low_era40_blocking_'+type+'.list',king_dat) #generate listname listnm = 'era40_blocking_'+str(type)+'_low_blk' stype = 'low' clim = read_list(king+'era40_blocking_'+type+'.list',king_dat) #checks of data read in if len(data) != len(clim) or len(data[0]) != len(clim[0]): print "Error: Array lengths don't match\nData: "+str(len(data)),str(len(data[0]))+"\nClim: "+str(len(clim)),str(len(clim[0])) return 0 #compute difference array diff = zeros(shape=(len(data),len(data[0])+1)) test = [] for lat in range(0,len(data)): for lon in range(0,len(data[0])): diff[lat][lon] = -clim[lat][lon] + data[lat][lon] #final value for diff for lat in range(0, len(data)): diff[lat][-1] = -clim[lat][0] + data[lat][0] opt.append(diff)