def openDAPsst(version = '3b', debug = False, anomalies = True, newFormat = False, **kwargs): """ This function downloads data from the new ERSSTv4 on the IRI data library kwargs should contain: startyr, endyr, startmon, endmon, nbox """ from transform import int_to_month from os.path import isfile from pydap.client import open_url from numpy import arange import pickle import re from collections import namedtuple SSTurl = 'http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCDC/.ERSST/.version' + version + '/' + \ '.anom/T/%28startmon%20startyr%29%28endmon%20endyr%29RANGEEDGES/T/nbox/0.0/boxAverage/dods' i2m = int_to_month() DLargs = { 'startmon' : i2m[kwargs['months'][0]], 'endmon' : i2m[kwargs['months'][-1]], 'startyr' : str(kwargs['startyr']), 'endyr' : str(kwargs['endyr']), 'nbox' : str(kwargs['n_mon']) } fp = EV['DATA'] + '/nipa/SST/' + DLargs['startmon'] + DLargs['startyr'] + \ '_' + DLargs['endmon'] + DLargs['endyr'] + '_nbox_' + DLargs['nbox'] + '_version' + version seasonal_var = namedtuple('seasonal_var', ('data','lat','lon')) if isfile(fp): #print 'Using pickled SST' f = open(fp) sstdata = pickle.load(f) f.close() var = seasonal_var(sstdata['grid'], sstdata['lat'], sstdata['lon']) if newFormat: return var return sstdata else: print 'New SST field, will save to' print fp for kw in DLargs: SSTurl = re.sub(kw, DLargs[kw], SSTurl) dataset = open_url(SSTurl) sst = dataset['anom'] time = dataset['T'] print 'Starting download...' grid = sst.array[:,:,:,:].squeeze() t = time.data[:].squeeze() sstlat = dataset['Y'][:] sstlon = dataset['X'][:] print 'Download finished.' #_Grid has shape (ntim, nlat, nlon) nseasons = 12 / kwargs['n_mon'] if debug: print 'Number of seasons is %i, number of months is %i' % (nseasons, kwargs['n_mon']) ntime = len(t) idx = arange(0, ntime, nseasons) sst = grid[idx] var = seasonal_var(sst, sstlat, sstlon) sstdata = { 'grid' : sst, 'lon' : sstlon, 'lat' : sstlat } f = open(fp,'w') pickle.dump(sstdata,f) f.close() if newFormat: return var return sstdata
def create_kwgroups(debug = False, climdiv_startyr = 1895, n_yrs = 120, \ climdiv_months = [3, 4, 5, 6], n_mon_sst = 4, sst_lag = 4, n_mon_slp = 2, \ slp_lag = 2, n_mon_mei = 4, mei_lag = 4, filin = EV['PRCP']): """ This function takes information about the seasons, years, and type of divisional data to look at, and creates appropriate kwgroups (parameters) to be input into data loading and openDap modules. """ #_Check a few things assert os.path.isfile(filin), 'File does not exist' assert climdiv_startyr >= 1895, 'Divisional data only extends to 1895' assert climdiv_months[0] >= 1, 'Divisonal data can only wrap to the following year' assert climdiv_months[-1] <= 15, 'DJFM (i.e. [12, 13, 14, 15]) is the biggest wrap allowed' #_Following block sets the appropriate start month for the SST and SLP fields #_based on the input climdiv_months and the specified lags sst_months = [] slp_months = [] mei_months = [] sst_start = climdiv_months[0] - sst_lag sst_months.append(sst_start) slp_start = climdiv_months[0] - slp_lag slp_months.append(slp_start) mei_start = climdiv_months[0] - mei_lag mei_months.append(mei_start) #_The for loops then populate the rest of the sst(slp)_months based n_mon_sst(slp) for i in range(1, n_mon_sst): sst_months.append(sst_start + i) for i in range(1, n_mon_slp): slp_months.append(slp_start + i) for i in range(1, n_mon_mei): mei_months.append(mei_start + i) assert sst_months[0] >= -8, 'sst_lag set too high, only goes to -8' assert slp_months[0] >= -8, 'slp_lag set too high, only goes to -8' assert mei_months[0] >= -8, 'mei_lag set too high, only goes to -8' #_Next block of code checks start years and end years and sets appropriately. #_So hacky.. ######################################################### ######################################################### if climdiv_months[-1] <= 12: climdiv_endyr = climdiv_startyr + n_yrs - 1 if sst_months[0] < 1 and sst_months[-1] < 1: sst_startyr = climdiv_startyr - 1 sst_endyr = climdiv_endyr - 1 elif sst_months[0] < 1 and sst_months[-1] >= 1: sst_startyr = climdiv_startyr - 1 sst_endyr = climdiv_endyr elif sst_months[0] >=1 and sst_months[-1] >= 1: sst_startyr = climdiv_startyr sst_endyr = climdiv_endyr elif climdiv_months[-1] > 12: climdiv_endyr = climdiv_startyr + n_yrs if sst_months[0] < 1 and sst_months[-1] < 1: sst_startyr = climdiv_startyr - 1 sst_endyr = climdiv_endyr - 2 elif sst_months[0] < 1 and sst_months[-1] >= 1: sst_startyr = climdiv_startyr - 1 sst_endyr = climdiv_endyr - 1 elif sst_months[0] >=1 and 1 <= sst_months[-1] <= 12: sst_startyr = climdiv_startyr sst_endyr = climdiv_endyr - 1 elif sst_months[0] >=1 and sst_months[-1] > 12: sst_startyr = climdiv_startyr sst_endyr = climdiv_endyr if climdiv_months[-1] <= 12: climdiv_endyr = climdiv_startyr + n_yrs - 1 if mei_months[0] < 1 and mei_months[-1] < 1: mei_startyr = climdiv_startyr - 1 mei_endyr = climdiv_endyr - 1 elif mei_months[0] < 1 and mei_months[-1] >= 1: mei_startyr = climdiv_startyr - 1 mei_endyr = climdiv_endyr elif mei_months[0] >=1 and mei_months[-1] >= 1: mei_startyr = climdiv_startyr mei_endyr = climdiv_endyr elif climdiv_months[-1] > 12: climdiv_endyr = climdiv_startyr + n_yrs if mei_months[0] < 1 and mei_months[-1] < 1: mei_startyr = climdiv_startyr - 1 mei_endyr = climdiv_endyr - 2 elif mei_months[0] < 1 and mei_months[-1] >= 1: mei_startyr = climdiv_startyr - 1 mei_endyr = climdiv_endyr - 1 elif mei_months[0] >=1 and 1 <= mei_months[-1] <= 12: mei_startyr = climdiv_startyr mei_endyr = climdiv_endyr - 1 elif mei_months[0] >=1 and mei_months[-1] > 12: mei_startyr = climdiv_startyr mei_endyr = climdiv_endyr if climdiv_months[-1] <= 12: climdiv_endyr = climdiv_startyr + n_yrs - 1 if slp_months[0] < 1 and slp_months[-1] < 1: slp_startyr = climdiv_startyr - 1 slp_endyr = climdiv_endyr - 1 elif slp_months[0] < 1 and slp_months[-1] >= 1: slp_startyr = climdiv_startyr - 1 slp_endyr = climdiv_endyr elif slp_months[0] >=1 and slp_months[-1] >= 1: slp_startyr = climdiv_startyr slp_endyr = climdiv_endyr elif climdiv_months[-1] > 12: climdiv_endyr = climdiv_startyr + n_yrs if slp_months[0] < 1 and slp_months[-1] < 1: slp_startyr = climdiv_startyr - 1 slp_endyr = climdiv_endyr - 2 elif slp_months[0] < 1 and slp_months[-1] >= 1: slp_startyr = climdiv_startyr - 1 slp_endyr = climdiv_endyr - 1 elif slp_months[0] >=1 and 1 <= slp_months[-1] <= 12: slp_startyr = climdiv_startyr slp_endyr = climdiv_endyr - 1 elif slp_months[0] >=1 and slp_months[-1] > 12: slp_startyr = climdiv_startyr slp_endyr = climdiv_endyr ######################################################### ######################################################### if debug: from transform import int_to_month i2m = int_to_month() print 'Precip starts in %s-%d, ends in %s-%d' % \ (i2m[climdiv_months[0]], climdiv_startyr, i2m[climdiv_months[-1]], climdiv_endyr) print 'SST starts in %s-%d, ends in %s-%d' % \ (i2m[sst_months[0]], sst_startyr, i2m[sst_months[-1]], sst_endyr) print 'SLP starts in %s-%d, ends in %s-%d' % \ (i2m[slp_months[0]], slp_startyr, i2m[slp_months[-1]], slp_endyr) print 'MEI starts in %s-%d, ends in %s-%d' % \ (i2m[mei_months[0]], mei_startyr, i2m[mei_months[-1]], mei_endyr) #_Create function output kwgroups = { 'climdiv' : { 'filin' : filin, 'startyr' : climdiv_startyr, 'endyr' : climdiv_endyr, 'months' : climdiv_months }, 'sst' : { 'n_mon' : n_mon_sst, 'months' : sst_months, 'startyr' : sst_startyr, 'endyr' : sst_endyr }, 'slp' : { 'n_mon' : n_mon_slp, 'months' : slp_months, 'startyr' : slp_startyr, 'endyr' : slp_endyr, 'n_year' : n_yrs }, 'mei' : { 'n_mon' : n_mon_mei, 'months' : mei_months, 'startyr' : mei_startyr, 'endyr' : mei_endyr, 'n_year' : n_yrs } } return kwgroups
def load_slp(newFormat = False, debug = False, anomalies = True, **kwargs): """ This function loads HADSLP2r data. """ from transform import slp_tf, int_to_month from netCDF4 import Dataset from sklearn.preprocessing import scale from numpy import arange, zeros, where from os.path import isfile import pandas as pd import pickle transform = slp_tf() #This is for transforming kwargs into DLargs DLargs = { 'startmon' : transform[kwargs['months'][0]], 'endmon' : transform[kwargs['months'][-1]], 'startyr' : str(kwargs['startyr']), 'endyr' : str(kwargs['endyr']), 'nbox' : str(kwargs['n_mon']) } i2m = int_to_month() #_Use in naming convention fp = EV['DATA'] + '/nipa/SLP/' + i2m[kwargs['months'][0]] + \ DLargs['startyr'] + '_' + i2m[kwargs['months'][-1]] + \ DLargs['endyr'] + '_nbox_' + DLargs['nbox'] if isfile(fp): #print 'Using pickled SLP' f = open(fp) slpdata = pickle.load(f) f.close() if newFormat: from collections import namedtuple seasonal_var = namedtuple('seasonal_var', ('data','lat','lon')) slp = seasonal_var(slpdata['grid'], slpdata['lat'], slpdata['lon']) return slp return slpdata print 'Creating new SLP pickle from netCDF file' #_Next block takes the netCDF file and extracts the time to make #_a time index. nc_fp = EV['DATA'] + '/netCDF/slp.mnmean.real.nc' dat = Dataset(nc_fp) t = dat.variables['time'] extractargs = { 'start' : '1850-01', 'periods' : len(t[:]), 'freq' : 'M', } timeindex = pd.date_range(**extractargs) #Need to get start and end out of time index startyr = kwargs['startyr'] startmon = int(DLargs['startmon']) idx_start = where((timeindex.year == startyr) & (timeindex.month == startmon)) idx = [] [idx.extend(arange(kwargs['n_mon']) + idx_start + 12*n) for n in range(kwargs['n_year'])] """ This is how sst open dap does it but doesn't work for this idx = ((timeindex.year >= int(DLargs['startyr'])) & \ ((timeindex.month >= int(DLargs['startmon'])) & \ (timeindex.month <= int(DLargs['endmon'])))) & \ ((timeindex.year <= int(DLargs['endyr']))) """ if debug: print timeindex[idx][:10] lat = dat.variables['lat'][:] lon = dat.variables['lon'][:] slp = dat.variables['slp'][:] nlat = len(lat) nlon = len(lon) time = timeindex[idx] slpavg = zeros((kwargs['n_year'], nlat, nlon)) for year, mons in enumerate(idx): slpavg[year] = slp[mons].mean(axis=0) if debug: print 'Averaging ', mons #WHERE TO SCALE THE DATA? for i in range(nlat): for j in range(nlon): slpavg[:,i,j] = scale(slpavg[:,i,j]) slpdata = { 'grid' : slpavg, 'lat' : lat, 'lon' : lon } f = open(fp,'w') pickle.dump(slpdata,f) print 'SLP data saved to %s' % (fp) f.close() if newFormat: from collections import namedtuple seasonal_var = namedtuple('seasonal_var', ('data','lat','lon')) slp = seasonal_var(slpdata['grid'], slpdata['lat'], slpdata['lon']) return slp return slpdata