Example #1
0
def create_phase_index(debug = False, **kwargs):
	# kwargs = kwgroups['mei']
	from numpy import sort
	mei = load_mei()
	from numpy import where, arange, zeros, inf
	from transform import slp_tf
	tran = slp_tf()
	startmon = int(tran[kwargs['months'][0]])
	startyr = kwargs['startyr']
	idx_start = where((mei.index.year == startyr) & (mei.index.month == startmon))
	idx = []
	[idx.extend(arange(kwargs['n_mon']) + idx_start + 12*n) for n in range(kwargs['n_year'])]
	mei_avg = zeros((kwargs['n_year']))
	for year, mons in enumerate(idx):
		mei_avg[year] = mei.values[mons].mean()

	mei = sort(mei_avg)
	pos = mei[mei>0]
	neg = mei[mei<0]
	n_el = int(round(len(pos)*0.34))
	n_la = int(round(len(neg)*0.34))
	n_np = int(len(pos) - n_el)
	n_nn = int(len(neg) - n_la)

	# cutoffs = {
	# 	'la'	: (neg[0], neg[n_la-1]),
	# 	'nn'	: (neg[n_la], neg[-10]),
	# 	'np'	: (pos[10], pos[n_np-1]),
	# 	'el'	: (pos[-n_el], pos[-1]),
	# 	'N'		: (neg[n_la/2], pos[n_np])
	# }

	cutoffs = {
		'la'	: (neg[0], neg[n_la-1]),
		'nn'	: (neg[n_la], neg[-1]),
		'np'	: (pos[0], pos[n_np-1]),
		'el'	: (pos[-n_el], pos[-1]),
		'N'		: (neg[n_la + 1], pos[n_np-1])
	}

	phaseind = {
			'elnino' 	: (mei_avg >= cutoffs['el'][0]) & (mei_avg <= \
				cutoffs['el'][1]),
			'lanina' 	: (mei_avg >= cutoffs['la'][0]) & (mei_avg <= \
				cutoffs['la'][1]),
			'neutral'	: (mei_avg >= cutoffs['N'][0]) & (mei_avg <= \
				cutoffs['N'][1]),
			'neutpos'	: (mei_avg >= cutoffs['np'][0]) & (mei_avg <= \
				cutoffs['np'][1]),
			'neutneg'	: (mei_avg >= cutoffs['nn'][0]) & (mei_avg <= \
				cutoffs['nn'][1]),
			'allyears'	: (mei_avg >= -inf)
			}


	return mei_avg, phaseind
Example #2
0
def load_climdiv_dataframes(debug = False, **kwargs):

	##################
	###LOAD MODULES###
	##################
	import numpy as np
	import pandas as pd
	#_when you call the function, you use **kwgroups['climdiv']
	#_then, kwargs within the function is kwgroups['climdiv']
	#_kwgroups is made by calling create_kwgroups (line 177 in data_load)
	fp = kwargs['filin']

	#_see importStates function
	states = importStates()
	#_see importDivs function
	divnums = importDivs()

	#_look at the text file that fp points to
	#_we use loadtxt with dtype set to 'string', so that every element in dat is a string
	#_the first column is the division code (see the readme in the climdiv folder)
	dat = np.loadtxt(fp, dtype=str)

	#_now we'll split dat into an nx1 array containing division codes,
	#_and the data into a nx12 array containing the monthly data
	climcodes = dat[:,0]
	climdata = dat[:,1:]

	#_now i needed to extract the division codes from the year/month part of climcodes
	divcodes = []
	years = []
	for item in climcodes:
		divcodes.append(item[:4]) #_take through the 4th letter in the string
		years.append(item[-4:]) #_take from 4 from the end to the end.
		#_the middle items are left out because we know it's precipitation

	#_now to the lists into arrays
	divcodes = np.array(divcodes)

	#_okay this is where we loop through all our arrays, and append the data for one
	#_division into one long monthly time series.
	#_alldata is a dictionary where the key is the division name (i.e. 'Alabama-01', a string)
	#_use the 'next' command with pdb uncommented to see what is what at every step through the loop.

	alldata = {}
	divnames = []
	#import pdb; pdb.set_trace()
	for sc in sorted(states):
		for dc in divnums:
			division = sc+dc
			idx = np.where(divcodes == division)[0] #_np.where returns a tuple
			divdata = []
			if len(idx) > 0:
				for year in idx:
					yearlydata = climdata[year]
					for month in range(12):
						divdata.append(np.float(yearlydata[month]))
				divname = states[sc] + '-' + dc
				divnames.append(divname)
				alldata[divname] = divdata
			else:
				pass

	#import pdb; pdb.set_trace()

	#_calculate the number of months
	nperiods = 12*(int(years[-1]) - int(years[0]) + 1)

	#_start the year at the first year
	indstartyr = years[0]

	#_use pandas date_range function to form an index for the data frame. use the help command to see how it works.
	index = pd.date_range(indstartyr, periods = nperiods, freq = 'M')

	#_make a data frame from a dictionary (pandas method, which is why we put all the data in a dictinoary earlier)
	#_it automatically sets the column names as the dictionary keys
	data = pd.DataFrame.from_dict(alldata)

	#_now we set the index in the data frame to the one we created
	data = data.set_index(index)

	#_replace the missing values with nans to make calculations work corerctly
	data = data.replace(to_replace = -9.99, value = np.nan)


	######################################################
	#_from here on out, use pdb.set_trace() to chug through
	#_and figure out what's going on.  It's a little hacky,
	#_but that's how I roll.

	#_data is now a data frame with all the months and years in it in order:
	#_we need data output as seasonal totals for the set of years we
	#_want to analyze. so all the if statements make new dataframes after
	#_combining the data the appropriate way, depending on how kwgroups['climdiv'] is set
	#_through the create_kwgroups function.
	n = len(index)

	#_transform for start date
	from transform import slp_tf
	tf = slp_tf()

	#_Now extract

	if kwargs['months'][-1] > 12:
		start = str(kwargs['startyr'] + 1) + '-' + tf[kwargs['months'][-1]]
		nperiods = kwargs['endyr'] - kwargs['startyr']
		rangeyrs = range(kwargs['startyr'] + 1 , kwargs['endyr'] + 1)
	else:
		start = str(kwargs['startyr']) + '-' + tf[kwargs['months'][-1]]
		nperiods = kwargs['endyr'] - kwargs['startyr'] + 1
		rangeyrs = range(kwargs['startyr'], kwargs['endyr'] + 1)
	if debug:
		print rangeyrs
		print 'Start string is %s' % (start)
	index = pd.date_range(start, periods = nperiods, freq = '12M')
	newdataframe = pd.DataFrame(columns = index)
	indyears = data.index.year
	indmonths = data.index.month

	for year in rangeyrs:
		idx = np.repeat(False, n)
		bools = (indyears==year) & (indmonths == month)
		month = kwargs['months'][0]
		x = len(kwargs['months'])
		bools = (indyears==year) & (indmonths == month)
		loc = np.where(bools)[0]
		for y in range(x):
			idx[loc+y] = True
		"""
		This was the old code, they should both basically do the same thing
		though now it can split over years (i.e. start in N)
		for month in kwargs['months']:
			bools.append((indyears==year) & (indmonths == month))
		for b in bools:
			idx = idx | b
			"""
		newdataframe[str(year)] = data[idx].sum()
	newdataframe = newdataframe.T
	dataframes = {}
	for code in states:
		state = states[code]
		divlist = []
		for div in divnames:
			if div[:-3] == state:
				divlist.append(div)
		dataframes[state] = pd.DataFrame()
		for div in divlist:
			dataframes[state][div] = newdataframe[div]
	regions = importRegions()
	regionalDF = {}
	alldivDF = pd.DataFrame(index = dataframes['Wisconsin'].index)
	#import pdb; pdb.set_trace()
	for region in regions:
		regionalDF[region] = pd.DataFrame(index = dataframes['Wisconsin'].index)
		for state in regions[region]:
			for div in dataframes[state]:
				alldivDF[div] = dataframes[state][div]
				regionalDF[region][div] = dataframes[state][div]

	return alldivDF, regionalDF, dataframes
Example #3
0
def load_slp(newFormat = False, debug = False, anomalies = True, **kwargs):
	"""
	This function loads HADSLP2r data.
	"""
	from transform import slp_tf, int_to_month
	from netCDF4 import Dataset
	from sklearn.preprocessing import scale
	from numpy import arange, zeros, where
	from os.path import isfile
	import pandas as pd
	import pickle

	transform = slp_tf()	#This is for transforming kwargs into DLargs

	DLargs = {
		'startmon'	: transform[kwargs['months'][0]],
		'endmon'	: transform[kwargs['months'][-1]],
		'startyr'	: str(kwargs['startyr']),
		'endyr'		: str(kwargs['endyr']),
		'nbox'		: str(kwargs['n_mon'])
			}
	i2m = int_to_month() #_Use in naming convention
	fp = EV['DATA'] + '/nipa/SLP/' + i2m[kwargs['months'][0]] + \
		DLargs['startyr'] + '_' + i2m[kwargs['months'][-1]] + \
		DLargs['endyr'] + '_nbox_' + DLargs['nbox']

	if isfile(fp):
		#print 'Using pickled SLP'
		f = open(fp)
		slpdata = pickle.load(f)
		f.close()
		if newFormat:
			from collections import namedtuple
			seasonal_var = namedtuple('seasonal_var', ('data','lat','lon'))
			slp = seasonal_var(slpdata['grid'], slpdata['lat'], slpdata['lon'])
			return slp
		return slpdata
	print 'Creating new SLP pickle from netCDF file'

	#_Next block takes the netCDF file and extracts the time to make
	#_a time index.
	nc_fp = EV['DATA'] + '/netCDF/slp.mnmean.real.nc'
	dat = Dataset(nc_fp)
	t = dat.variables['time']
	extractargs = {
		'start'		: '1850-01',
		'periods'	: len(t[:]),
		'freq'		: 'M',
			}
	timeindex = pd.date_range(**extractargs)


	#Need to get start and end out of time index
	startyr = kwargs['startyr']
	startmon = int(DLargs['startmon'])

	idx_start = where((timeindex.year == startyr) & (timeindex.month == startmon))
	idx = []
	[idx.extend(arange(kwargs['n_mon']) + idx_start + 12*n) for n in range(kwargs['n_year'])]

	"""
	This is how sst open dap does it but doesn't work for this
	idx = ((timeindex.year >= int(DLargs['startyr'])) & \
			((timeindex.month >= int(DLargs['startmon'])) & \
			 (timeindex.month <= int(DLargs['endmon'])))) & \
				((timeindex.year <= int(DLargs['endyr'])))
	"""


	if debug:
		print timeindex[idx][:10]

	lat = dat.variables['lat'][:]
	lon = dat.variables['lon'][:]
	slp = dat.variables['slp'][:]

	nlat = len(lat)
	nlon = len(lon)
	time = timeindex[idx]
	slpavg = zeros((kwargs['n_year'], nlat, nlon))

	for year, mons in enumerate(idx):
		slpavg[year] = slp[mons].mean(axis=0)
		if debug:
			print 'Averaging ', mons

	#WHERE TO SCALE THE DATA?
	for i in range(nlat):
		for j in range(nlon):
			slpavg[:,i,j] = scale(slpavg[:,i,j])
	slpdata = {
			'grid'	:	slpavg,
			'lat'	:	lat,
			'lon'	:	lon
			}
	f = open(fp,'w')
	pickle.dump(slpdata,f)
	print 'SLP data saved to %s' % (fp)
	f.close()
	if newFormat:
		from collections import namedtuple
		seasonal_var = namedtuple('seasonal_var', ('data','lat','lon'))
		slp = seasonal_var(slpdata['grid'], slpdata['lat'], slpdata['lon'])
		return slp
	return slpdata