Example #1
0
def sig_test(
    list_dir="",
    data_dir="",
    monte=True,
    sttest=False,
    tcrit=0,
    list_name="era40_blocking_thpv2.list",
    high=True,
    month="thpv2",
    trials=1000,
    cutoff=0.8,
):
    try:
        from open import stdata
        from open import read_list
        from numpy import zeros

        # determining whether list is high/low solar data
        hh = "high"
        if high != True:
            hh = "low"
        # filepath of solar data list
        name = list_dir + hh + "_" + list_name
        # extracting data to arrays
        solar_data = stdata(name, directory=data_dir, monthly=month)
        all_data = stdata(list_dir + list_name, directory=data_dir, monthly=month)
        # blocking frequency of hig/low solar and climatological blocking frequency
        clim = read_list(list_dir + list_name, data_dir)
        b_hls = read_list(name, data_dir)
        # test statistic
        diff = np.array(solar_data) - np.array(clim)
        # t-test to find significant lat-lon points at a specific confidence level
        if sttest == True:
            opt = ttest(zeros(diff.shape), diff, tcrit)
            return opt
            # monte carlo bootstrap method for determining a lat/lon array of significances
        if monte == True:
            from random import randint

            # generate trial values for analysis
            for trial in range(trials):
                # generate len(solar_data) random years and initial zero array
                test = zeros(all_data[0].shape)
                for i in range(len(solar_data)):
                    year = randint(0, len(solar_data) - 1)
                    # check for correct shape, exit is not
                    if all_data[year].shape != (20, 96):
                        exit(0)
                        # append each randomly generated year to test array
                    test += all_data[year]
                    # first trial condition
                if trial == 0:
                    # generate statistic
                    values = test / len(solar_data) - clim
                    # values = np.array(diff_test(test/len(solar_data),clim))
                    # reshape for concatenation
                    values.shape = (len(values), len(values[0]), 1)
                    # same method as above for subsequent trials
                elif trial != 0:
                    tmp = np.array(test / len(solar_data) - clim)
                    # tmp = np.array(diff_test(test/len(solar_data),clim))
                    tmp.shape = (len(values), len(values[0]), 1)
                    # concatenate arrays to form final array
                    values = np.concatenate((values, tmp), 2)
            print values.shape
            fig = plt.figure()
            plt.hist(values[6][18])
            axes = plt.gca()
            # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)
            # axes.plot(x, y, 'r')
            axes.set_xlabel(xlabel)
            axes.set_ylabel(ylabel)
            plt.title(title)
            fig.show()
            print diff[6][18]
            # reshape difference array for concatenation
            diff.shape = (len(values), len(values[0]), 1)
            # return the index within each element of the array that will sort the values
            tmp = np.mean(values, axis=2)
            values = np.concatenate((values, diff), 2).argsort().argsort()  # this second argsort is essential
            tmp2 = values
            # print values
            # account for odd behaviour for when both are zero
            for lat in range(len(values)):
                for lon in range(len(values[lat])):
                    if values[lat][lon][-1] == trials:  # and diff[lat][lon][0] == 0:
                        values[lat][lon][-1] = trials / 2
                        # if lat == 19:
                        # print values[19][lon][-1],diff[19][lon]
                        # isolate index that the difference array will need when sorting
            sig = np.delete(values, s_[:-1], 2)
            # reshape array to lat/lon style
            sig.shape = (len(values), len(values[0]))
            # transform indices into probabilities
            sig = sig.astype(float) / float(trials)

            # # alternate method - I consider this to be incorrect but did produce okay graphs
            # sig = (values == trials).nonzero()[-1] # sig = sig.astype(float)/float(trials)		#generate an array of 1s and 0s depending if in range of two tailed significance #values <lower limit
            lower = (1 - cutoff) / 2
            opt = zeros(sig.shape)
            for lat in range(len(values)):
                for lon in range(len(values[lat])):
                    if sig[lat][lon] > cutoff + lower:
                        opt[lat][lon] = 1
                    if sig[lat][lon] < lower:
                        opt[lat][lon] = 1
                    # opt2 = - (sig - (1+lower)).astype(int)
                    # #values > upper limit
                    # opt2 = opt2 + (sig + (1-cutoff)/2).astype(int)
                    # for lat in range(len(values)):
                    # 	for lon in range(len(values[lat])):
                    # 		if opt1[lat][lon] != opt2[lat][lon]:
                    # 			print lat*3.72,lon*3.75,diff[lat][lon],tmp[lat][lon],opt1[lat][lon],opt2[lat][lon],tmp2[lat][lon][-1]
            return opt.astype(int)

    except IOError as err:
        print "File error: " + str(err)

    except ValueError as err:
        print "Value Error: " + str(err)
Example #2
0
# critical t value (to be entered manually)
tcrit = 2.03
sig_lvl = 0.05
cutoff = 0.9

# output file name and othe graphing options
graph = True
stype = 'ttest'
output = '/media/jonathan/KINGSTON/blocking/graphs/test.'
filled = False

	
# open and read list into an array
import open
all_data = open.stdata(king_list+str(run)+'/'+str(run)+'.1860-2010.thpv2_months.list',dat,monthly='thpv2',daily=False,total=False,numpy=True)

# extract the high/low time values

# identify start
start = 1860
jump1 = 1940
jump2 = 1950
jump3 = 2010

from solar import quantile

# check TSI wrt time

def check_TSI(start=1860,end=2100,compress=True,graph=False):
	# open data files 
Example #3
0
		
	try:
		if arr != True
		from open import open_pkl
		#read in x-y data
		opt = []
		data_xy = open_pkl(king_dat,'era40.gga'+v+'.year-2002.month-01.b.'+type+'_003.duration_ge_5_day.pkl')
		Lon,Lat = data_xy['lon']['lon'],data_xy['lat']['lat']
		Lon = np.append(Lon,360+Lon[0])
		#print Lon
		X,Y = meshgrid(Lon,Lat)
		opt.append(X)
		opt.append(Y)
		# #read in data
		from open import stdata
		all_data = stdata('[DIR]/era40_blocking_thpv2.list',directory='/media/jonathan/KINGSTON/blocking/data/pkl_files/blocking/',monthly='thpv2')
		from solar import years
		yrs = np.array(years()['SCmin'])-1957

		if high == True:
			yrs = np.array(years()['SCmax'])-1957
			data = np.mean(all_data[yrs],axis=0)
			# data = read_list(king+'high_era40_blocking_'+type+'.list',king_dat)
			#generate listname
			listnm = 'era40_blocking_'+str(type)+'_high_blk'
			stype = 'high'
		elif high != True:
			yrs = np.array(years()['SCmin'])-1957
			data = np.mean(all_data[yrs],axis=0)
			# data = read_list(king+'low_era40_blocking_'+type+'.list',king_dat)
			#generate listname