def make_land_mask(file): from netcdf_tools import ncextractall import numpy as np #Read in file from which new mask will be generated maskdat = ncextractall('/Users/ailieg/Data/drought_model_eval_data/lsmask.nc') mlon = maskdat['lon'] mlat = maskdat['lat'] mask = maskdat['mask'] mask = mask[0,:,:] mask = mask[::-1,:] #For some reason land in this mask is zero and ocean is one - flip this mask[np.where(mask == 1)] = 3 mask[np.where(mask == 0)] = 1 mask[np.where(mask == 3)] = 0 #Read in file containing the longitude and latitude to which the mask will be interpolated dat = ncextractall(file) lon = dat['lon'] lat = dat['lat'] newmask = interp_landgrid(mlon, mlat, lon, lat, mask) return newmask
def rcs_model(winlen, modfile): from grid_tools import trim_time_jandec from netCDF4 import num2date from netCDF4 import date2num from scipy import ndimage from netcdf_tools import ncextractall from convert import mmd_mmm #Extract the model data and clip to the required start and end months modnc = ncextractall(modfile) mdata = modnc['pr'] mdata = mdata*86400. #convert to same units as obs mlon = modnc['lon'] mlat = modnc['lat'] mtime = modnc['time'] time_u = modnc['time_units'] if 'time_calendar' in modnc.keys(): cal = modnc['time_calendar'] mtime = num2date(mtime,units = time_u, calendar=cal) else: mtime = num2date(mtime,units = time_u) mdata, mtime = trim_time_jandec(mdata, mtime) mdata = mmd_mmm(mdata) mdata = ndimage.filters.uniform_filter(mdata,size=[winlen,1,1]) #Trim first or last values if required as they are unrepresentative trim = int(winlen/2) mdata = mdata[trim:,:,:] if winlen % 2 == 0: trim = trim - 1 mdata = mdata[:-trim,:,:] return, mdata, mlat, mlon
def statsfromgrid(file, vname) : import numpy as np import scipy.stats as stats from netcdf_tools import ncextractall import collections #Import and extract the netcdf file, returns a dictionary datdict = ncextractall(file) #Read each variable data = datdict[vname] lon = datdict['lon'] lat = datdict['lat'] time = datdict['time'] #Create empty arrays for output mean = np.zeros((len(lat), len(lon))) stdev = np.zeros((len(lat), len(lon))) skew = np.zeros((len(lat), len(lon))) acorr = np.zeros((len(lat), len(lon))) numzeros = np.zeros((len(lat), len(lon))) #Loop over lon and lat monthly data and record the statistics for i in range(0,len(lat)-1): for j in range(0,len(lon)-1): tmpdata = data[:,i,j] tmpdata = tmpdata.flatten() mean[i,j] = np.mean(tmpdata) stdev[i,j] = np.std(tmpdata) skew[i,j] = stats.skew(tmpdata) numzeros[i,j] = (tmpdata[np.where(tmpdata == 0.0)]).size tmp = collections.deque(tmpdata) tmp.rotate(-1) tmp = np.asarray(tmp) acorr[i,j] = stats.pearsonr(tmpdata,tmp)[0] if (stats.pearsonr(tmpdata,tmp)[0]) > 0.8: print(i,j,stats.pearsonr(tmpdata,tmp)[0]) return numzeros.flatten(), acorr.flatten()
def rcs_gpcp(winlen): from grid_tools import trim_time_jandec from netCDF4 import num2date from netCDF4 import date2num from scipy import ndimage from netcdf_tools import ncextractall from convert import mmd_mmm #Extract the observed data and clip to the required start and end months obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' obsnc = ncextractall(obfile) odata = obsnc['precip'] olon = obsnc['lon'] olat = obsnc['lat'] olat = olat[::-1] odata = odata[:,::-1,:] otime = obsnc['time'] obsmiss = obsnc['precip_missing_value'] odata[np.where(odata == obsmiss)] = np.nan time_u = obsnc['time_units'] if 'time_calendar' in obsnc.keys(): cal = obsnc['time_calendar'] otime = num2date(otime,units = time_u, calendar=cal) else: otime = num2date(otime,units = time_u) odata, otime = trim_time_jandec(odata, otime) odata = mmd_mmm(odata) odata = ndimage.filters.uniform_filter(odata,size=[winlen,1,1]) #Trim first or last values if required as they are unrepresentative trim = int(winlen/2) odata = odata[trim:,:,:] if winlen % 2 == 0: trim = trim - 1 odata = odata[:-trim,:,:] return, odata, olat, olon
def spi_netcdf_grid(rfile, wfile, vname, nspi): from netcdf_tools import ncextractall from netcdf_tools import ncwrite_climgrid from netCDF4 import num2date from netCDF4 import date2num import numpy as np # Import and extract the netcdf file, returns a dictionary datdict = ncextractall(rfile) # Read each variable data = datdict[vname] lon = datdict["lon"] lat = datdict["lat"] time = datdict["time"] # convert missing numbers to NaN miss = datdict[vname + "_missing_value"] data = np.where(data == miss, np.nan, data) # convert time units to actual date time_u = datdict["time_units"] if "time_calendar" in datdict.keys(): cal = datdict["time_calendar"] time = num2date(time, units=time_u, calendar=cal) else: time = num2date(time, units=time_u) trimmed = grid_tools.trim_time_jandec(data, time) data = trimmed[0] time = trimmed[1] # Create an empty array to store the SPI data spigrid = np.zeros(data.shape) # Compute the SPI at all locations for i in range(0, len(lat)): for j in range(0, len(lon)): tmpdata = data[:, i, j] tmpdata = tmpdata.flatten() tmpspi = spi(tmpdata, nspi) tmpspi = tmpspi.flatten() spigrid[:, i, j] = tmpspi # convert missing numbers back to a float spigrid = np.where(spigrid == np.nan, miss, spigrid) # convert time back to original units if "time_calendar" in datdict.keys(): cal = datdict["time_calendar"] time = date2num(time, units=time_u, calendar=cal) else: time = date2num(time, units=time_u) spidescrip = "The Standardised Precipitation Index (SPI) computed as per McKee et al. (1993)" spilong_name = str(nspi) + "-month Standardised Precipitation Index" spiname = "SPI" + str(nspi) write = ncwrite_climgrid( wfile, spigrid, spiname, spidescrip, spilong_name, miss, "standardised units", time, lon, lat, time_u ) return spigrid, lon, lat, time
def nc_ipophase(): from netCDF4 import date2num from netCDF4 import num2date import numpy as np from netcdf_tools import ncwrite_climgrid from netcdf_tools import ncextractall #Input file (this file goes from January 1870 to April 2016 as is) file = '/Users/ailieg/Data/HadISST_sst.nc' nc = ncextractall(file) sst = nc['sst'] lon = nc['longitude'] nlon = lon.size lat = nc['latitude'] nlat = lat.size time = nc['time'] miss = nc['sst_missing_value'] units = nc['sst_units'] #convert time units to actual date time_u = nc['time_units'] if 'time_calendar' in nc.keys(): cal = nc['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #extract years and months from the datetime array ntime = time.size year = np.zeros(ntime) month = np.zeros(ntime) i = 0 while (i < ntime): year[i] = time[i].year month[i] = time[i].month i=i+1 #Extract data from 1950 to 2015 only sst = sst[(year > 1976) & (year < 2000), :,:] time = time[(year > 1976) & (year < 2000)] ntime = time.size #Reshape the array to determine climatology over the whole period nyear = ntime/12 sst = sst.reshape([nyear,12,nlat,nlon]) time = time.reshape([nyear, 12]) mid = int(nyear/2) time = time[mid,:] sst = np.mean(sst, axis=0) #Write the output file descrip = 'Monthly climatology of HadISST SSTs from IPO positive years computed as 1977-1999' long_name = 'sst' missing = miss climunits = units time = date2num(time,units = time_u, calendar=cal) print(sst.shape,time.size,lon.size,lat.size) filename = '/Users/ailieg/Data/IPO_ACCESS/HadISST_IPOpos_1977_1999.nc' print("Writing netCDF file...") ncw = ncwrite_climgrid(filename, sst, 'sst', descrip, long_name, missing, climunits, time, lon, lat, time_u, cal) print("NetCDF file written") return sst, lat, lon
def plot_tele_corr(): from matplotlib import pyplot as plt from cartopy import config import cartopy.crs as ccrs from netcdf_tools import ncextractall import numpy as np from scipy import stats from netCDF4 import num2date import numpy as np with open('sam_1979_2010.dat') as file: sam = [[float(digit) for digit in line.split()] for line in file] s = 5 e = 11 sam = np.array(sam) sam = sam[:,1:] sam = sam[:,s:e] sam = np.mean(sam,axis=1) with open('nino34_1979_2010.dat') as file: enso = [[float(digit) for digit in line.split()] for line in file] enso = np.array(enso) enso = enso[:,1:] enso = enso[:,s:e] enso = np.mean(enso, axis=1) with open('iod_1979_2010.dat') as file: iod = [[float(digit) for digit in line.split()] for line in file] iod = np.array(iod) iod = iod[:,2] iod = iod.reshape(31,12) iod = iod[:,s:e] iod = np.mean(iod, axis=1) print("IOD:",iod.size) print("ENSO:",enso.size) print("SAM:",sam.size) obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' obsnc = ncextractall(obfile) obsp = obsnc['precip'] lon = obsnc['lon'] nlon = lon.size lat = obsnc['lat'] nlat = lat.size time = obsnc['time'] obsmiss = obsnc['precip_missing_value'] obsp[np.where(obsp == obsmiss)] = np.nan #convert time units to actual date time_u = obsnc['time_units'] if 'time_calendar' in obsnc.keys(): cal = obsnc['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start obsp = obsp[i:,:,:] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1 time = time[:i+1] ntime = time.size corr = np.zeros((nlat,nlon)) for i in range(0,nlon): for j in range(0,nlat): series = obsp[:,j,i] lens = series.size lens = lens/12 lens = int(lens) series = series.reshape([lens,12]) srs = series[0:31,s:e] series = np.mean(srs, axis=1) corriod = stats.pearsonr(iod, series)[0] correnso = stats.pearsonr(enso, series)[0] corrsam = stats.pearsonr(sam, series)[0] corr[j,i] = ((corriod**2)+(correnso**2)+(corrsam**2))*100 ax = plt.axes(projection=ccrs.PlateCarree()) lev = np.arange(-100.,105,5) pc = plt.contourf(lon, lat, corr, levels=lev, transform=ccrs.PlateCarree()) cb = plt.colorbar(pc) ax.coastlines() plt.show()
def plot_globrainvar(): from matplotlib import pyplot as plt from cartopy import config import cartopy.crs as ccrs from netcdf_tools import ncextractall import numpy as np from scipy import stats from netCDF4 import num2date import numpy as np obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' obsnc = ncextractall(obfile) obsp = obsnc['precip'] lon = obsnc['lon'] nlon = lon.size lat = obsnc['lat'] nlat = lat.size time = obsnc['time'] obsmiss = obsnc['precip_missing_value'] obsp[np.where(obsp == obsmiss)] = np.nan #convert time units to actual date time_u = obsnc['time_units'] if 'time_calendar' in obsnc.keys(): cal = obsnc['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start obsp = obsp[i:,:,:] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1 time = time[:i+1] ntime = time.size var = np.zeros((nlat,nlon)) for i in range(1,nlon): for j in range(1,nlat): series = obsp[:,j,i] lens = series.size lens = lens/12 lens = int(lens) series = series.reshape([lens,12]) series = np.sum(series,axis=1) sm = np.mean(series) sd = np.std(series) frac = (sd/sm)*100 frac = np.where(frac > 99.9,99.9,frac) var[j,i] = frac ax = plt.axes(projection=ccrs.PlateCarree()) lev = np.arange(-100.,105,5) pc = plt.contourf(lon, lat, var, levels=lev, transform=ccrs.PlateCarree()) cb = plt.colorbar(pc) ax.coastlines() plt.show()
def gpcp_corr(sone, stwo): from netcdf_tools import ncextractall import numpy as np from scipy import stats from netCDF4 import num2date obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' obsnc = ncextractall(obfile) obsp = obsnc['precip'] lon = obsnc['lon'] nlon = lon.size lat = obsnc['lat'] nlat = lat.size time = obsnc['time'] obsmiss = obsnc['precip_missing_value'] obsp[np.where(obsp == obsmiss)] = np.nan #convert time units to actual date time_u = obsnc['time_units'] if 'time_calendar' in obsnc.keys(): cal = obsnc['time_calendar'] time = num2date(time, units=time_u, calendar=cal) else: time = num2date(time, units=time_u) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start obsp = obsp[i:, :, :] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end obsp = obsp[:i + 1, :, :] #remember that Python does not include the final value, so it has to be +1 time = time[:i + 1] ntime = time.size corr = np.zeros((nlat, nlon)) + obsmiss for i in range(0, nlon): for j in range(0, nlat): series = obsp[:, j, i] lens = series.size lens = lens / 12 lens = int(lens) series = series.reshape([lens, 12]) if sone[0] > sone[sone.size - 1]: sidxone = np.arange(0, sone.size) series = np.roll(series, -1 * (sone[0] - 1)) seriesone = np.mean(series[:, sidxone], axis=1) else: sidxone = sone - 1 seriesone = np.mean(series[:, sidxone], axis=1) if stwo[0] > stwo[stwo.size - 1]: sidxone = np.arange(0, sone.size) series = np.roll(series, -1 * (sone[0] - 1)) seriestwo = np.mean(series[:, sidxtwo], axis=1) else: sidxtwo = stwo - 1 seriestwo = np.mean(series[:, sidxtwo], axis=1) corr[j, i] = stats.pearsonr(seriesone, seriestwo)[0] return corr, lat, lon
def synth_climseries(file, vname, nsyn): import numpy as np import scipy.stats as stats from netcdf_tools import ncextractall from netCDF4 import num2date #Import and extract the netcdf file, returns a dictionary datdict = ncextractall(file) #Read each variable data = datdict[vname] lon = datdict['lon'] lat = datdict['lat'] time = datdict['time'] #convert time units to actual date time_u = datdict['time_units'] if 'time_calendar' in datdict.keys(): cal = datdict['time_calendar'] time = num2date(time, units=time_u, calendar=cal) else: time = num2date(time, units=time_u) #convert missing numbers to NaN miss = datdict[vname + "_missing_value"] data = np.where(data == miss, np.nan, data) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start data = data[i:, :, :] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end data = data[:i + 1, :, :] #remember that Python does not include the final value, so it has to be +1 time = time[:i + 1] #Create an empty array that will contain the synthetic series synthseries = np.zeros((lat.size, lon.size, time.size, nsyn)) synthtest = np.zeros((lat.size, lon.size, time.size, nsyn)) des = np.zeros((time.size, lat.size, lon.size)) ac = np.zeros((lat.size, lon.size)) for i in range(0, len(lat)): print("Lat ", i, "of ", len(lat)) for j in range(0, len(lon)): series = data[:, i, j] series = series.flatten() #The data must be stratified into months so that the SPI may be computed #for each month so data is deseasonalised. #Reshape the array to stratify into months. lens = len(series) lens = lens / 12 lens = int(lens) series = series.reshape([lens, 12]) #compute auto-correlation of the deseasonalised time series (to remove auto- #correlation associated with seasonality, which will automatically be removed #when gamma distributions are computed seasonally) sm = series.mean(axis=0) seriessm = np.tile(sm, 36) deseas = (series.flatten()) - seriessm des[:, i, j] = deseas lag = np.roll(deseas, -1) ac[i, j] = stats.pearsonr(deseas, lag)[0] #Compute NSYN number of synthetic series for m in range(0, nsyn - 1): tmpsyn = np.zeros((lens, 12)) #Compute the parameters for the gamma distribution, one month at a time for k in range(0, 12): tmp = series[:, k] tmpsave = tmp #remove any NaNs (i.e. missing numbers) from data so only real numbers exist tmp = tmp[~np.isnan(tmp)] if len(tmp) > 10: #compute the number of zeros numzeros = (tmp[np.where(tmp == 0.0)]).size #compute the probability of zeros based on the sample series q = numzeros / tmp.size #compute the probability of non-zeros based on the sample series p = 1.0 - q #compute the shape, scale and location parameters based on non-zero data only nonzerotmp = tmp[np.where(tmp > 0.0)] numnonzero = nonzerotmp.size A = np.log(np.mean(nonzerotmp)) - ( np.sum(np.log(nonzerotmp)) / len(nonzerotmp)) shp = (1.0 / (4 * A)) * (1 + ((1 + ((4 * A) / 3))**0.5)) scl = np.mean(nonzerotmp) / shp #test bit------- #-------------- #Compute synthetic distribution of non-zero values synthgam = stats.gamma.rvs(shp, scale=scl, size=numnonzero) if q > 0.0: zeroarr = np.zeros(numzeros) synthgam = np.concatenate((zeroarr, synthgam)) np.random.shuffle(synthgam) tmpsyn[:, k] = synthgam tmps = tmpsyn.flatten() else: tmps = np.zeros(len(tmpsave)) + miss tmps = tmps.flatten() synthseries[i, j, :, m] = tmps + ((ac[i, j]) * tmps) synthtest[i, j, :, m] = tmps return synthseries, synthtest, data, des, lon, lat, time, time_u, miss, ac
def synth_test(file, vname, nsyn): import numpy as np import scipy.stats as stats from netcdf_tools import ncextractall from netCDF4 import num2date #Import and extract the netcdf file, returns a dictionary datdict = ncextractall(file) #Read each variable data = datdict[vname] lon = datdict['lon'] lat = datdict['lat'] time = datdict['time'] #convert time units to actual date time_u = datdict['time_units'] if 'time_calendar' in datdict.keys(): cal = datdict['time_calendar'] time = num2date(time, units=time_u, calendar=cal) else: time = num2date(time, units=time_u) #convert missing numbers to NaN miss = datdict[vname + "_missing_value"] data = np.where(data == miss, np.nan, data) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start data = data[i:, :, :] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end data = data[:i + 1, :, :] #remember that Python does not include the final value, so it has to be +1 time = time[:i + 1] #Create an empty array that will contain the synthetic series synthseries = np.zeros((lat.size, lon.size, time.size, nsyn)) des = np.zeros((time.size, lat.size, lon.size)) for i in range(1, len(lat)): print("Lat ", i, "of ", len(lat)) for j in range(1, len(lon)): series = data[:, i, j] series = series.flatten() #The data must be stratified into months so that the SPI may be computed #for each month so data is deseasonalised. #Reshape the array to stratify into months. lens = len(series) lens = lens / 12 lens = int(lens) series = series.reshape([lens, 12]) #compute auto-correlation of the deseasonalised time series (to remove auto- #correlation associated with seasonality, which will automatically be removed #when gamma distributions are computed seasonally) sm = series.mean(axis=0) seriessm = np.tile(sm, lens) deseas = (series.flatten()) - seriessm des[:, i, j] = deseas ac = np.zeros(6) for l in range(1, 7): ac[l - 1] = stats.pearsonr(deseas, np.roll(deseas, (-1 * l)))[0] #reverse the autocorrelation function for use later ac = ac[::-1] tmpsyn = np.zeros((lens, 12)) shp = np.zeros(12) scl = np.zeros(12) #make an array of 1s for later - used to store zeros if necessary zeros = np.zeros((lens, 12)) + 1.0 #Compute the parameters for the gamma distribution, one month at a time for k in range(0, 12): tmp = series[:, k] tmpsave = tmp #remove any NaNs (i.e. missing numbers) from data so only real numbers exist tmp = tmp[~np.isnan(tmp)] if len(tmp) > 10: #compute the number of zeros numzeros = (tmp[np.where(tmp == 0.0)]).size #compute the probability of zeros based on the sample series q = numzeros / tmp.size #compute the probability of non-zeros based on the sample series p = 1.0 - q #compute the shape, scale and location parameters based on non-zero data only nonzerotmp = tmp[np.where(tmp > 0.0)] numnonzero = nonzerotmp.size A = np.log(np.mean(nonzerotmp)) - ( np.sum(np.log(nonzerotmp)) / len(nonzerotmp)) shp[k] = (1.0 / (4 * A)) * (1 + ((1 + ((4 * A) / 3))**0.5)) scl[k] = np.mean(nonzerotmp) / shp[k] tmpz = np.zeros(lens) + 1.0 tmpz[0:numzeros] = 0.0 np.random.shuffle(tmpz) zeros[:, k] = tmpz else: tmps = np.zeros(len(tmpsave)) + miss #Reshape shape and scale alpha = np.tile(shp, lens) beta = np.tile(scl, lens) zeros = zeros.flatten() #Compute NSYN number of synthetic (surrogate) series for m in range(0, nsyn): #noise = ((alpha[0]*beta[0]) + (alpha[0]*(beta[0]**2))*(stats.norm.rvs())) noise = stats.gamma.rvs(alpha[0], scale=beta[0], size=6) surgam = np.zeros((time.size)) surgam[0:6] = noise #Compute surrogate data using form x(t) = a0 + a1(xt-1) + sigma*e1 #for w in range(7, time.size): # if zeros[w] < 1.0: # surgam[w] = 0.0 # #noise = ((alpha[w]*beta[w]) + (alpha[w]*(beta[w]**2))*(stats.norm.rvs())) # noise = stats.gamma.rvs(alpha[w], scale = beta[w]) # else: # noise = stats.gamma.rvs(alpha[w], scale = beta[w]) # surgam[w] = noise + (np.sum(ac*surgam[w-7:w-1])) #if surgam[100] > 1000: print(surgam[100]) surgam = x synthseries[i, j, :, m] = surgam #synthtest[i,j,:,m] = tmps return synthseries, data, alpha, beta, zeros, ac, surgam
def gpcp_corr(sone, stwo): from netcdf_tools import ncextractall import numpy as np from scipy import stats from netCDF4 import num2date obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' obsnc = ncextractall(obfile) obsp = obsnc['precip'] lon = obsnc['lon'] nlon = lon.size lat = obsnc['lat'] nlat = lat.size time = obsnc['time'] obsmiss = obsnc['precip_missing_value'] obsp[np.where(obsp == obsmiss)] = np.nan #convert time units to actual date time_u = obsnc['time_units'] if 'time_calendar' in obsnc.keys(): cal = obsnc['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start obsp = obsp[i:,:,:] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1 time = time[:i+1] ntime = time.size corr = np.zeros((nlat,nlon)) + obsmiss for i in range(0,nlon): for j in range(0,nlat): series = obsp[:,j,i] lens = series.size lens = lens/12 lens = int(lens) series = series.reshape([lens,12]) if sone[0] > sone[sone.size-1]: sidxone = np.arange(0,sone.size) series = np.roll(series,-1*(sone[0]-1)) seriesone = np.mean(series[:,sidxone],axis=1) else: sidxone = sone - 1 seriesone = np.mean(series[:,sidxone],axis=1) if stwo[0] > stwo[stwo.size-1]: sidxone = np.arange(0,sone.size) series = np.roll(series,-1*(sone[0]-1)) seriestwo = np.mean(series[:,sidxtwo],axis=1) else: sidxtwo = stwo - 1 seriestwo = np.mean(series[:,sidxtwo],axis=1) corr[j,i] = stats.pearsonr(seriesone, seriestwo)[0] return corr, lat, lon
def synth_climseries(file, vname, nsyn) : import numpy as np import scipy.stats as stats from netcdf_tools import ncextractall from netCDF4 import num2date #Import and extract the netcdf file, returns a dictionary datdict = ncextractall(file) #Read each variable data = datdict[vname] lon = datdict['lon'] lat = datdict['lat'] time = datdict['time'] #convert time units to actual date time_u = datdict['time_units'] if 'time_calendar' in datdict.keys(): cal = datdict['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #convert missing numbers to NaN miss = datdict[vname+"_missing_value"] data = np.where(data == miss,np.nan,data) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start data = data[i:,:,:] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end data = data[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1 time = time[:i+1] #Create an empty array that will contain the synthetic series synthseries = np.zeros((lat.size, lon.size, time.size, nsyn)) synthtest = np.zeros((lat.size, lon.size, time.size, nsyn)) des = np.zeros((time.size, lat.size, lon.size)) ac = np.zeros((lat.size, lon.size)) for i in range(0,len(lat)): print ("Lat ", i, "of ", len(lat)) for j in range(0,len(lon)): series= data[:,i,j] series = series.flatten() #The data must be stratified into months so that the SPI may be computed #for each month so data is deseasonalised. #Reshape the array to stratify into months. lens = len(series) lens = lens/12 lens = int(lens) series = series.reshape([lens,12]) #compute auto-correlation of the deseasonalised time series (to remove auto- #correlation associated with seasonality, which will automatically be removed #when gamma distributions are computed seasonally) sm = series.mean(axis=0) seriessm = np.tile(sm,36) deseas = (series.flatten()) - seriessm des[:,i,j] = deseas lag = np.roll(deseas, -1) ac[i,j] = stats.pearsonr(deseas, lag)[0] #Compute NSYN number of synthetic series for m in range(0,nsyn-1): tmpsyn = np.zeros((lens,12)) #Compute the parameters for the gamma distribution, one month at a time for k in range(0,12): tmp = series[:,k] tmpsave = tmp #remove any NaNs (i.e. missing numbers) from data so only real numbers exist tmp = tmp[~np.isnan(tmp)] if len(tmp) > 10: #compute the number of zeros numzeros = (tmp[np.where(tmp == 0.0)]).size #compute the probability of zeros based on the sample series q = numzeros/tmp.size #compute the probability of non-zeros based on the sample series p = 1.0 - q #compute the shape, scale and location parameters based on non-zero data only nonzerotmp = tmp[np.where(tmp > 0.0)] numnonzero = nonzerotmp.size A = np.log(np.mean(nonzerotmp)) - (np.sum(np.log(nonzerotmp))/len(nonzerotmp)) shp = (1.0/(4*A)) * (1 + ((1 + ((4*A)/3) )**0.5)) scl = np.mean(nonzerotmp)/shp #test bit------- #-------------- #Compute synthetic distribution of non-zero values synthgam = stats.gamma.rvs(shp, scale=scl, size=numnonzero) if q > 0.0: zeroarr = np.zeros(numzeros) synthgam = np.concatenate((zeroarr, synthgam)) np.random.shuffle(synthgam) tmpsyn[:,k] = synthgam tmps = tmpsyn.flatten() else: tmps = np.zeros(len(tmpsave)) + miss tmps = tmps.flatten() synthseries[i,j,:,m] = tmps + ((ac[i,j])*tmps) synthtest[i,j,:,m] = tmps return synthseries, synthtest, data, des, lon, lat, time, time_u, miss, ac
def synth_test(file, vname, nsyn) : import numpy as np import scipy.stats as stats from netcdf_tools import ncextractall from netCDF4 import num2date #Import and extract the netcdf file, returns a dictionary datdict = ncextractall(file) #Read each variable data = datdict[vname] lon = datdict['lon'] lat = datdict['lat'] time = datdict['time'] #convert time units to actual date time_u = datdict['time_units'] if 'time_calendar' in datdict.keys(): cal = datdict['time_calendar'] time = num2date(time,units = time_u, calendar=cal) else: time = num2date(time,units = time_u) #convert missing numbers to NaN miss = datdict[vname+"_missing_value"] data = np.where(data == miss,np.nan,data) #check that the data array begins in January i = 0 smon = time[i].month while (smon != 1): i = i + 1 smon = time[i].month #clip the array at the start data = data[i:,:,:] time = time[i:] #check that the data array ends in December i = len(time) - 1 emon = time[i].month while (emon != 12): i = i - 1 emon = time[i].month #clip the array at the end data = data[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1 time = time[:i+1] #Create an empty array that will contain the synthetic series synthseries = np.zeros((lat.size, lon.size, time.size, nsyn)) des = np.zeros((time.size, lat.size, lon.size)) for i in range(1,len(lat)): print ("Lat ", i, "of ", len(lat)) for j in range(1,len(lon)): series= data[:,i,j] series = series.flatten() #The data must be stratified into months so that the SPI may be computed #for each month so data is deseasonalised. #Reshape the array to stratify into months. lens = len(series) lens = lens/12 lens = int(lens) series = series.reshape([lens,12]) #compute auto-correlation of the deseasonalised time series (to remove auto- #correlation associated with seasonality, which will automatically be removed #when gamma distributions are computed seasonally) sm = series.mean(axis=0) seriessm = np.tile(sm,lens) deseas = (series.flatten()) - seriessm des[:,i,j] = deseas ac = np.zeros(6) for l in range(1,7):ac[l-1]=stats.pearsonr(deseas, np.roll(deseas,(-1*l)))[0] #reverse the autocorrelation function for use later ac = ac[::-1] tmpsyn = np.zeros((lens,12)) shp = np.zeros(12) scl = np.zeros(12) #make an array of 1s for later - used to store zeros if necessary zeros = np.zeros((lens,12))+1.0 #Compute the parameters for the gamma distribution, one month at a time for k in range(0,12): tmp = series[:,k] tmpsave = tmp #remove any NaNs (i.e. missing numbers) from data so only real numbers exist tmp = tmp[~np.isnan(tmp)] if len(tmp) > 10: #compute the number of zeros numzeros = (tmp[np.where(tmp == 0.0)]).size #compute the probability of zeros based on the sample series q = numzeros/tmp.size #compute the probability of non-zeros based on the sample series p = 1.0 - q #compute the shape, scale and location parameters based on non-zero data only nonzerotmp = tmp[np.where(tmp > 0.0)] numnonzero = nonzerotmp.size A = np.log(np.mean(nonzerotmp)) - (np.sum(np.log(nonzerotmp))/len(nonzerotmp)) shp[k] = (1.0/(4*A)) * (1 + ((1 + ((4*A)/3) )**0.5)) scl[k] = np.mean(nonzerotmp)/shp[k] tmpz = np.zeros(lens)+1.0 tmpz[0:numzeros]=0.0 np.random.shuffle(tmpz) zeros[:,k] = tmpz else: tmps = np.zeros(len(tmpsave)) + miss #Reshape shape and scale alpha = np.tile(shp,lens) beta = np.tile(scl, lens) zeros = zeros.flatten() #Compute NSYN number of synthetic (surrogate) series for m in range(0,nsyn): #noise = ((alpha[0]*beta[0]) + (alpha[0]*(beta[0]**2))*(stats.norm.rvs())) noise = stats.gamma.rvs(alpha[0], scale = beta[0], size=6) surgam = np.zeros((time.size)) surgam[0:6] = noise #Compute surrogate data using form x(t) = a0 + a1(xt-1) + sigma*e1 #for w in range(7, time.size): # if zeros[w] < 1.0: # surgam[w] = 0.0 # #noise = ((alpha[w]*beta[w]) + (alpha[w]*(beta[w]**2))*(stats.norm.rvs())) # noise = stats.gamma.rvs(alpha[w], scale = beta[w]) # else: # noise = stats.gamma.rvs(alpha[w], scale = beta[w]) # surgam[w] = noise + (np.sum(ac*surgam[w-7:w-1])) #if surgam[100] > 1000: print(surgam[100]) surgam = x synthseries[i,j,:,m] = surgam #synthtest[i,j,:,m] = tmps return synthseries, data, alpha,beta,zeros, ac, surgam
def plot_mmm_bootstrap(pc, wl, s): import numpy as np from netcdf_tools import ncextractall from matplotlib import pyplot as plt import cartopy.crs as ccrs from matplotlib.colors import BoundaryNorm from matplotlib.ticker import MaxNLocator from grid_tools import fold_grid ofile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc' cmiptitle = ['ACCESS1-0_historical_r1i1p1',\ 'CanESM2_historical_r1i1p1',\ 'GFDL-CM3_historical_r1i1p1',\ 'HadGEM2-CC_historical_r1i1p1',\ 'MPI-ESM-P_historical_r1i1p1',\ 'CCSM4_historical_r1i1p1',\ 'FGOALS-s2_historical_r1i1p1',\ 'GISS-E2-R_historical_r6i1p1',\ 'NorESM1-M_historical_r1i1p1',\ 'IPSL-CM5B-LR_historical_r1i1p1'] amiptitle = ['FGOALS-s2_amip_r1i1p1',\ 'GFDL-CM3_amip_r1i1p1',\ 'HadGEM2-A_amip_r1i1p1',\ 'NorESM1-M_amip_r1i1p1'] cmipfile = ['CMIP5/ACCESS1-0/r1i1p1/pr/pr_Amon_ACCESS1-0_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/CanESM2/r1i1p1/pr/pr_Amon_CanESM2_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/GFDL-CM3/r1i1p1/pr/pr_Amon_GFDL-CM3_historical_r1i1p1_186001-200512.nc',\ 'CMIP5/HadGEM2-CC/r1i1p1/pr/pr_Amon_HadGEM2-CC_historical_r1i1p1_185912-200511.nc',\ 'CMIP5/MPI-ESM-P/r1i1p1/pr/pr_Amon_MPI-ESM-P_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/CCSM4/r1i1p1/pr/pr_Amon_CCSM4_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/FGOALS-s2/r1i1p1/pr/pr_Amon_FGOALS-s2_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/GISS-E2-R/r6i1p1/pr/pr_Amon_GISS-E2-R_historical_r6i1p1_185001-200512.nc',\ 'CMIP5/NorESM1-M/r1i1p1/pr/pr_Amon_NorESM1-M_historical_r1i1p1_185001-200512.nc',\ 'CMIP5/IPSL-CM5B-LR/r1i1p1/pr/pr_Amon_IPSL-CM5B-LR_historical_r1i1p1_185001-200512.nc'] amipfile = ['AMIP/FGOALS-s2/r1i1p1/pr/pr_Amon_FGOALS-s2_amip_r1i1p1_197901-200812.nc',\ 'AMIP/GFDL-CM3/r1i1p1/pr/pr_Amon_GFDL-CM3_amip_r1i1p1_197901_200812.nc',\ 'AMIP/HadGEM2-A/r1i1p1/pr/pr_Amon_HadGEM2-A_amip_r1i1p1_197809-200811.nc',\ 'AMIP/NorESM1-M/r1i1p1/pr/pr_Amon_NorESM1-M_amip_r1i1p1_197901-200512.nc'] modfile = cmipfile intitle = cmiptitle modpath = '/Users/ailieg/Data/drought_model_eval_data/data/' obsnc = ncextractall(ofile) lon = obsnc['lon'] lat = obsnc['lat'] sigmod = np.zeros((len(modfile), lat.size, lon.size)) for i in range(0,len(modfile)): mfile = modpath+modfile[i] it = intitle[i] d, sig, lat, lon = perc_compare_bsoblen(pc, wl, s, ofile, mfile, it) sig[sig < 0.0] = 0.0 sigmod[i,:,:] = sig sumsig = np.sum(sigmod, axis=0)/len(modfile) #Set levels and colormap levels = np.arange(0,100,10) cmap = plt.get_cmap('Spectral') norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True) #Make the grid circular sumsig, lat, lon = fold_grid(sumsig, lat, lon) #Set axes and plot ax = plt.axes(projection=ccrs.PlateCarree()) p=plt.pcolormesh(lon, lat, d, cmap=cmap, norm=norm) #Add a colorbar cbar = plt.colorbar(p, extend='both') cbar.ax.set_ylabel('%') ax.coastlines() #Create title for saved plot seasname = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',\ 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',\ 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] title = 'PERC_MODELS_CMIP_'+str(wl)+'mth_'+seasname[s-1]+seasname[s+wl-2]+'_'+str(pc)+'th%ile' plt.title(title, fontsize=10) #Save the output savefile = title+'.png' outfile = '/Users/ailieg/Data/drought_model_eval_data/analysis/'+savefile plt.savefig(outfile, dpi=400, format='png',bbox_inches='tight') plt.close()