Exemple #1
0
def make_land_mask(file): 
	from netcdf_tools import ncextractall
	import numpy as np
	
	#Read in file from which new mask will be generated
	maskdat = ncextractall('/Users/ailieg/Data/drought_model_eval_data/lsmask.nc')
	mlon = maskdat['lon']
	mlat = maskdat['lat']
	mask = maskdat['mask']
	mask = mask[0,:,:]
	mask = mask[::-1,:]
	
	#For some reason land in this mask is zero and ocean is one - flip this
	mask[np.where(mask == 1)] = 3
	mask[np.where(mask == 0)] = 1
	mask[np.where(mask == 3)] = 0
	
	
	#Read in file containing the longitude and latitude to which the mask will be interpolated
	dat = ncextractall(file)
	lon = dat['lon']
	lat = dat['lat']
	
	newmask = interp_landgrid(mlon, mlat, lon, lat, mask)
	
	return newmask
def rcs_model(winlen, modfile):
	from grid_tools import trim_time_jandec
	from netCDF4 import num2date
	from netCDF4 import date2num
	from scipy import ndimage
	from netcdf_tools import ncextractall
	from convert import mmd_mmm

	#Extract the model data and clip to the required start and end months
	modnc = ncextractall(modfile)
	mdata = modnc['pr']
	mdata = mdata*86400. #convert to same units as obs
	mlon = modnc['lon']
	mlat = modnc['lat']
	mtime = modnc['time']
	
	time_u = modnc['time_units']
	if 'time_calendar' in modnc.keys(): 
	     cal = modnc['time_calendar']
	     mtime = num2date(mtime,units = time_u, calendar=cal)
	else: mtime = num2date(mtime,units = time_u)
	
	mdata, mtime = trim_time_jandec(mdata, mtime)
	
	mdata = mmd_mmm(mdata)
	mdata = ndimage.filters.uniform_filter(mdata,size=[winlen,1,1])
	
		#Trim first or last values if required as they are unrepresentative
	trim = int(winlen/2)
	mdata = mdata[trim:,:,:]
	if winlen % 2 == 0: trim = trim - 1
	mdata = mdata[:-trim,:,:]
	
	return, mdata, mlat, mlon
Exemple #3
0
def statsfromgrid(file, vname) :
	import numpy as np
	import scipy.stats as stats
	from netcdf_tools import ncextractall
	import collections

	#Import and extract the netcdf file, returns a dictionary
	datdict = ncextractall(file)
	
	#Read each variable
	data = datdict[vname]
	lon = datdict['lon']
	lat = datdict['lat']
	time = datdict['time']
	
	#Create empty arrays for output
	mean = np.zeros((len(lat), len(lon)))
	stdev = np.zeros((len(lat), len(lon)))
	skew = np.zeros((len(lat), len(lon)))
	acorr = np.zeros((len(lat), len(lon)))
	numzeros = np.zeros((len(lat), len(lon)))
	
	#Loop over lon and lat monthly data and record the statistics
	for i in range(0,len(lat)-1):
	    for j in range(0,len(lon)-1):
	         tmpdata = data[:,i,j]
	         tmpdata = tmpdata.flatten()
	         mean[i,j] = np.mean(tmpdata)
	         stdev[i,j] = np.std(tmpdata) 
	         skew[i,j] = stats.skew(tmpdata)
	         numzeros[i,j] = (tmpdata[np.where(tmpdata == 0.0)]).size
	         
	         tmp = collections.deque(tmpdata)
	         tmp.rotate(-1)
	         tmp = np.asarray(tmp)
	         acorr[i,j] = stats.pearsonr(tmpdata,tmp)[0]
	         if (stats.pearsonr(tmpdata,tmp)[0]) > 0.8: print(i,j,stats.pearsonr(tmpdata,tmp)[0])


	return numzeros.flatten(), acorr.flatten()
def rcs_gpcp(winlen):
	from grid_tools import trim_time_jandec
	from netCDF4 import num2date
	from netCDF4 import date2num
	from scipy import ndimage
	from netcdf_tools import ncextractall
	from convert import mmd_mmm
	
	#Extract the observed data and clip to the required start and end months
	obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
	obsnc = ncextractall(obfile)
	odata = obsnc['precip']
	olon = obsnc['lon']
	olat = obsnc['lat']
	olat = olat[::-1]
	odata = odata[:,::-1,:]
	otime = obsnc['time']
	obsmiss = obsnc['precip_missing_value']
	odata[np.where(odata == obsmiss)] = np.nan
	
	time_u = obsnc['time_units']
	if 'time_calendar' in obsnc.keys(): 
	     cal = obsnc['time_calendar']
	     otime = num2date(otime,units = time_u, calendar=cal)
	else: otime = num2date(otime,units = time_u)
	
	odata, otime = trim_time_jandec(odata, otime)

	odata = mmd_mmm(odata)
	odata = ndimage.filters.uniform_filter(odata,size=[winlen,1,1])
	
		#Trim first or last values if required as they are unrepresentative
	trim = int(winlen/2)
	odata = odata[trim:,:,:]
	if winlen % 2 == 0: trim = trim - 1
	odata = odata[:-trim,:,:]

	return, odata, olat, olon
def spi_netcdf_grid(rfile, wfile, vname, nspi):
    from netcdf_tools import ncextractall
    from netcdf_tools import ncwrite_climgrid
    from netCDF4 import num2date
    from netCDF4 import date2num
    import numpy as np

    # Import and extract the netcdf file, returns a dictionary
    datdict = ncextractall(rfile)

    # Read each variable
    data = datdict[vname]
    lon = datdict["lon"]
    lat = datdict["lat"]
    time = datdict["time"]

    # convert missing numbers to NaN
    miss = datdict[vname + "_missing_value"]
    data = np.where(data == miss, np.nan, data)

    # convert time units to actual date
    time_u = datdict["time_units"]
    if "time_calendar" in datdict.keys():
        cal = datdict["time_calendar"]
        time = num2date(time, units=time_u, calendar=cal)
    else:
        time = num2date(time, units=time_u)

    trimmed = grid_tools.trim_time_jandec(data, time)
    data = trimmed[0]
    time = trimmed[1]

    # Create an empty array to store the SPI data
    spigrid = np.zeros(data.shape)

    # Compute the SPI at all locations
    for i in range(0, len(lat)):
        for j in range(0, len(lon)):
            tmpdata = data[:, i, j]
            tmpdata = tmpdata.flatten()
            tmpspi = spi(tmpdata, nspi)
            tmpspi = tmpspi.flatten()
            spigrid[:, i, j] = tmpspi

    # convert missing numbers back to a float
    spigrid = np.where(spigrid == np.nan, miss, spigrid)

    # convert time back to original units
    if "time_calendar" in datdict.keys():
        cal = datdict["time_calendar"]
        time = date2num(time, units=time_u, calendar=cal)
    else:
        time = date2num(time, units=time_u)

    spidescrip = "The Standardised Precipitation Index (SPI) computed as per McKee et al. (1993)"
    spilong_name = str(nspi) + "-month Standardised Precipitation Index"
    spiname = "SPI" + str(nspi)

    write = ncwrite_climgrid(
        wfile, spigrid, spiname, spidescrip, spilong_name, miss, "standardised units", time, lon, lat, time_u
    )

    return spigrid, lon, lat, time
Exemple #6
0
def nc_ipophase():
	from netCDF4 import date2num
	from netCDF4 import num2date
	import numpy as np
	from netcdf_tools import ncwrite_climgrid
	from netcdf_tools import ncextractall

	#Input file (this file goes from January 1870 to April 2016 as is)
	file = '/Users/ailieg/Data/HadISST_sst.nc'
	
	nc = ncextractall(file)
	sst = nc['sst']
	lon = nc['longitude']
	nlon = lon.size
	lat = nc['latitude']
	nlat = lat.size
	time = nc['time']
	miss = nc['sst_missing_value']
	units = nc['sst_units']
	
	#convert time units to actual date
	time_u = nc['time_units']
	if 'time_calendar' in nc.keys(): 
	     cal = nc['time_calendar']
	     time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
	#extract years and months from the datetime array
	ntime = time.size
	year = np.zeros(ntime)
	month = np.zeros(ntime)
	i = 0
	while (i < ntime):
		year[i] = time[i].year
		month[i] = time[i].month
		i=i+1
		
	#Extract data from 1950 to 2015 only
	sst = sst[(year > 1976) & (year < 2000), :,:]
	time = time[(year > 1976) & (year < 2000)]
	ntime = time.size
	
	#Reshape the array to determine climatology over the whole period
	nyear = ntime/12
	sst = sst.reshape([nyear,12,nlat,nlon])
	time = time.reshape([nyear, 12])
	mid = int(nyear/2)
	time = time[mid,:]
	
	sst = np.mean(sst, axis=0)
	
	#Write the output file
	descrip = 'Monthly climatology of HadISST SSTs from IPO positive years computed as 1977-1999'
	long_name = 'sst'
	missing = miss
	climunits = units
	time = date2num(time,units = time_u, calendar=cal)
	print(sst.shape,time.size,lon.size,lat.size)
	filename = '/Users/ailieg/Data/IPO_ACCESS/HadISST_IPOpos_1977_1999.nc'
	
	print("Writing netCDF file...")
	ncw = ncwrite_climgrid(filename, sst, 'sst', descrip, long_name, missing, climunits, time, lon, lat, time_u, cal)
	print("NetCDF file written")
	
	return sst, lat, lon
Exemple #7
0
def plot_tele_corr():
	from matplotlib import pyplot as plt
	from cartopy import config
	import cartopy.crs as ccrs
	from netcdf_tools import ncextractall
	import numpy as np
	from scipy import stats
	from netCDF4 import num2date

	import numpy as np
	
	with open('sam_1979_2010.dat') as file:
		sam = [[float(digit) for digit in line.split()] for line in file]
	s = 5
	e = 11
	sam = np.array(sam)
	sam = sam[:,1:]
	sam = sam[:,s:e]
	sam = np.mean(sam,axis=1)
	
	with open('nino34_1979_2010.dat') as file:
		enso = [[float(digit) for digit in line.split()] for line in file]
	
	enso = np.array(enso)
	enso = enso[:,1:]
	enso = enso[:,s:e]
	enso = np.mean(enso, axis=1)
	
	with open('iod_1979_2010.dat') as file:
		iod = [[float(digit) for digit in line.split()] for line in file]
	
	iod = np.array(iod)
	iod = iod[:,2]
	iod = iod.reshape(31,12)
	iod = iod[:,s:e]
	iod = np.mean(iod, axis=1)
	print("IOD:",iod.size)
	print("ENSO:",enso.size)
	print("SAM:",sam.size)
	
	obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
	obsnc = ncextractall(obfile)
	obsp = obsnc['precip']
	lon = obsnc['lon']
	nlon = lon.size
	lat = obsnc['lat']
	nlat = lat.size
	time = obsnc['time']
	obsmiss = obsnc['precip_missing_value']
	obsp[np.where(obsp == obsmiss)] = np.nan
	
		#convert time units to actual date
	time_u = obsnc['time_units']
	if 'time_calendar' in obsnc.keys(): 
	     cal = obsnc['time_calendar']
	     time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
		#check that the data array begins in January 
	i = 0
	smon = time[i].month
	while (smon != 1):
	    i = i + 1
	    smon = time[i].month 
	    
	#clip the array at the start   
	obsp = obsp[i:,:,:]
	time = time[i:]
	
	#check that the data array ends in December 
	i = len(time) - 1
	emon = time[i].month
	while (emon != 12):
	    i = i - 1
	    emon = time[i].month
	#clip the array at the end    
	obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1
	time = time[:i+1]
	ntime = time.size
	
	corr = np.zeros((nlat,nlon))
	
	
	for i in range(0,nlon):
		for j in range(0,nlat):
		
			series = obsp[:,j,i]
			lens = series.size
			lens = lens/12
			lens = int(lens)
			series = series.reshape([lens,12])
			srs = series[0:31,s:e]
			series = np.mean(srs, axis=1)
			corriod = stats.pearsonr(iod, series)[0]
			correnso = stats.pearsonr(enso, series)[0]
			corrsam = stats.pearsonr(sam, series)[0]
			
			corr[j,i] = ((corriod**2)+(correnso**2)+(corrsam**2))*100
	
	ax = plt.axes(projection=ccrs.PlateCarree())
	
	lev = np.arange(-100.,105,5)
	pc = plt.contourf(lon, lat, corr, levels=lev, transform=ccrs.PlateCarree())
	cb = plt.colorbar(pc)

	ax.coastlines()
	plt.show()
Exemple #8
0
def plot_globrainvar():
	from matplotlib import pyplot as plt
	from cartopy import config
	import cartopy.crs as ccrs
	from netcdf_tools import ncextractall
	import numpy as np
	from scipy import stats
	from netCDF4 import num2date

	import numpy as np
	
	obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
	obsnc = ncextractall(obfile)
	obsp = obsnc['precip']
	lon = obsnc['lon']
	nlon = lon.size
	lat = obsnc['lat']
	nlat = lat.size
	time = obsnc['time']
	obsmiss = obsnc['precip_missing_value']
	obsp[np.where(obsp == obsmiss)] = np.nan
	
		#convert time units to actual date
	time_u = obsnc['time_units']
	if 'time_calendar' in obsnc.keys(): 
	     cal = obsnc['time_calendar']
	     time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
		#check that the data array begins in January 
	i = 0
	smon = time[i].month
	while (smon != 1):
	    i = i + 1
	    smon = time[i].month 
	    
	#clip the array at the start   
	obsp = obsp[i:,:,:]
	time = time[i:]
	
	#check that the data array ends in December 
	i = len(time) - 1
	emon = time[i].month
	while (emon != 12):
	    i = i - 1
	    emon = time[i].month
	#clip the array at the end    
	obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1
	time = time[:i+1]
	ntime = time.size
	
	var = np.zeros((nlat,nlon))
	
	
	for i in range(1,nlon):
		for j in range(1,nlat):
		
			series = obsp[:,j,i]
			lens = series.size
			lens = lens/12
			lens = int(lens)
			series = series.reshape([lens,12])
			series = np.sum(series,axis=1)
			
			sm = np.mean(series)
			sd = np.std(series)
			
			frac = (sd/sm)*100
			frac = np.where(frac > 99.9,99.9,frac)
			
			var[j,i] = frac
			

	ax = plt.axes(projection=ccrs.PlateCarree())
	
	lev = np.arange(-100.,105,5)
	pc = plt.contourf(lon, lat, var, levels=lev, transform=ccrs.PlateCarree())
	cb = plt.colorbar(pc)

	ax.coastlines()
	plt.show()
def gpcp_corr(sone, stwo):
    from netcdf_tools import ncextractall
    import numpy as np
    from scipy import stats
    from netCDF4 import num2date

    obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
    obsnc = ncextractall(obfile)
    obsp = obsnc['precip']
    lon = obsnc['lon']
    nlon = lon.size
    lat = obsnc['lat']
    nlat = lat.size
    time = obsnc['time']
    obsmiss = obsnc['precip_missing_value']
    obsp[np.where(obsp == obsmiss)] = np.nan

    #convert time units to actual date
    time_u = obsnc['time_units']
    if 'time_calendar' in obsnc.keys():
        cal = obsnc['time_calendar']
        time = num2date(time, units=time_u, calendar=cal)
    else:
        time = num2date(time, units=time_u)

    #check that the data array begins in January
    i = 0
    smon = time[i].month
    while (smon != 1):
        i = i + 1
        smon = time[i].month

    #clip the array at the start
    obsp = obsp[i:, :, :]
    time = time[i:]

    #check that the data array ends in December
    i = len(time) - 1
    emon = time[i].month
    while (emon != 12):
        i = i - 1
        emon = time[i].month
    #clip the array at the end
    obsp = obsp[:i +
                1, :, :]  #remember that Python does not include the final value, so it has to be +1
    time = time[:i + 1]
    ntime = time.size

    corr = np.zeros((nlat, nlon)) + obsmiss

    for i in range(0, nlon):
        for j in range(0, nlat):

            series = obsp[:, j, i]
            lens = series.size
            lens = lens / 12
            lens = int(lens)
            series = series.reshape([lens, 12])

            if sone[0] > sone[sone.size - 1]:
                sidxone = np.arange(0, sone.size)
                series = np.roll(series, -1 * (sone[0] - 1))
                seriesone = np.mean(series[:, sidxone], axis=1)
            else:
                sidxone = sone - 1
                seriesone = np.mean(series[:, sidxone], axis=1)

            if stwo[0] > stwo[stwo.size - 1]:
                sidxone = np.arange(0, sone.size)
                series = np.roll(series, -1 * (sone[0] - 1))
                seriestwo = np.mean(series[:, sidxtwo], axis=1)
            else:
                sidxtwo = stwo - 1
                seriestwo = np.mean(series[:, sidxtwo], axis=1)

            corr[j, i] = stats.pearsonr(seriesone, seriestwo)[0]

    return corr, lat, lon
def synth_climseries(file, vname, nsyn):

    import numpy as np
    import scipy.stats as stats
    from netcdf_tools import ncextractall
    from netCDF4 import num2date

    #Import and extract the netcdf file, returns a dictionary
    datdict = ncextractall(file)

    #Read each variable
    data = datdict[vname]
    lon = datdict['lon']
    lat = datdict['lat']
    time = datdict['time']

    #convert time units to actual date
    time_u = datdict['time_units']
    if 'time_calendar' in datdict.keys():
        cal = datdict['time_calendar']
        time = num2date(time, units=time_u, calendar=cal)
    else:
        time = num2date(time, units=time_u)

    #convert missing numbers to NaN
    miss = datdict[vname + "_missing_value"]
    data = np.where(data == miss, np.nan, data)

    #check that the data array begins in January
    i = 0
    smon = time[i].month
    while (smon != 1):
        i = i + 1
        smon = time[i].month

    #clip the array at the start
    data = data[i:, :, :]
    time = time[i:]

    #check that the data array ends in December
    i = len(time) - 1
    emon = time[i].month
    while (emon != 12):
        i = i - 1
        emon = time[i].month
    #clip the array at the end
    data = data[:i +
                1, :, :]  #remember that Python does not include the final value, so it has to be +1
    time = time[:i + 1]

    #Create an empty array that will contain the synthetic series
    synthseries = np.zeros((lat.size, lon.size, time.size, nsyn))
    synthtest = np.zeros((lat.size, lon.size, time.size, nsyn))
    des = np.zeros((time.size, lat.size, lon.size))
    ac = np.zeros((lat.size, lon.size))

    for i in range(0, len(lat)):
        print("Lat ", i, "of ", len(lat))
        for j in range(0, len(lon)):

            series = data[:, i, j]
            series = series.flatten()

            #The data must be stratified into months so that the SPI may be computed
            #for each month so data is deseasonalised.
            #Reshape the array to stratify into months.
            lens = len(series)
            lens = lens / 12
            lens = int(lens)
            series = series.reshape([lens, 12])

            #compute auto-correlation of the deseasonalised time series (to remove auto-
            #correlation associated with seasonality, which will automatically be removed
            #when gamma distributions are computed seasonally)
            sm = series.mean(axis=0)
            seriessm = np.tile(sm, 36)
            deseas = (series.flatten()) - seriessm
            des[:, i, j] = deseas
            lag = np.roll(deseas, -1)
            ac[i, j] = stats.pearsonr(deseas, lag)[0]

            #Compute NSYN number of synthetic series

            for m in range(0, nsyn - 1):

                tmpsyn = np.zeros((lens, 12))

                #Compute the parameters for the gamma distribution, one month at a time
                for k in range(0, 12):

                    tmp = series[:, k]
                    tmpsave = tmp

                    #remove any NaNs (i.e. missing numbers) from data so only real numbers exist
                    tmp = tmp[~np.isnan(tmp)]

                    if len(tmp) > 10:
                        #compute the number of zeros
                        numzeros = (tmp[np.where(tmp == 0.0)]).size

                        #compute the probability of zeros based on the sample series
                        q = numzeros / tmp.size
                        #compute the probability of non-zeros based on the sample series
                        p = 1.0 - q

                        #compute the shape, scale and location parameters based on non-zero data only
                        nonzerotmp = tmp[np.where(tmp > 0.0)]
                        numnonzero = nonzerotmp.size
                        A = np.log(np.mean(nonzerotmp)) - (
                            np.sum(np.log(nonzerotmp)) / len(nonzerotmp))
                        shp = (1.0 / (4 * A)) * (1 + ((1 +
                                                       ((4 * A) / 3))**0.5))
                        scl = np.mean(nonzerotmp) / shp

                        #test bit-------

                        #--------------

                        #Compute synthetic distribution of non-zero values
                        synthgam = stats.gamma.rvs(shp,
                                                   scale=scl,
                                                   size=numnonzero)

                        if q > 0.0:
                            zeroarr = np.zeros(numzeros)
                            synthgam = np.concatenate((zeroarr, synthgam))
                            np.random.shuffle(synthgam)

                        tmpsyn[:, k] = synthgam
                        tmps = tmpsyn.flatten()

                    else:
                        tmps = np.zeros(len(tmpsave)) + miss

                tmps = tmps.flatten()
                synthseries[i, j, :, m] = tmps + ((ac[i, j]) * tmps)
                synthtest[i, j, :, m] = tmps

    return synthseries, synthtest, data, des, lon, lat, time, time_u, miss, ac
def synth_test(file, vname, nsyn):

    import numpy as np
    import scipy.stats as stats
    from netcdf_tools import ncextractall
    from netCDF4 import num2date

    #Import and extract the netcdf file, returns a dictionary
    datdict = ncextractall(file)

    #Read each variable
    data = datdict[vname]
    lon = datdict['lon']
    lat = datdict['lat']
    time = datdict['time']

    #convert time units to actual date
    time_u = datdict['time_units']
    if 'time_calendar' in datdict.keys():
        cal = datdict['time_calendar']
        time = num2date(time, units=time_u, calendar=cal)
    else:
        time = num2date(time, units=time_u)

    #convert missing numbers to NaN
    miss = datdict[vname + "_missing_value"]
    data = np.where(data == miss, np.nan, data)

    #check that the data array begins in January
    i = 0
    smon = time[i].month
    while (smon != 1):
        i = i + 1
        smon = time[i].month

    #clip the array at the start
    data = data[i:, :, :]
    time = time[i:]

    #check that the data array ends in December
    i = len(time) - 1
    emon = time[i].month
    while (emon != 12):
        i = i - 1
        emon = time[i].month
    #clip the array at the end
    data = data[:i +
                1, :, :]  #remember that Python does not include the final value, so it has to be +1
    time = time[:i + 1]

    #Create an empty array that will contain the synthetic series
    synthseries = np.zeros((lat.size, lon.size, time.size, nsyn))
    des = np.zeros((time.size, lat.size, lon.size))

    for i in range(1, len(lat)):
        print("Lat ", i, "of ", len(lat))
        for j in range(1, len(lon)):

            series = data[:, i, j]
            series = series.flatten()

            #The data must be stratified into months so that the SPI may be computed
            #for each month so data is deseasonalised.
            #Reshape the array to stratify into months.
            lens = len(series)
            lens = lens / 12
            lens = int(lens)
            series = series.reshape([lens, 12])

            #compute auto-correlation of the deseasonalised time series (to remove auto-
            #correlation associated with seasonality, which will automatically be removed
            #when gamma distributions are computed seasonally)
            sm = series.mean(axis=0)
            seriessm = np.tile(sm, lens)
            deseas = (series.flatten()) - seriessm
            des[:, i, j] = deseas
            ac = np.zeros(6)
            for l in range(1, 7):
                ac[l - 1] = stats.pearsonr(deseas, np.roll(deseas,
                                                           (-1 * l)))[0]
            #reverse the autocorrelation function for use later
            ac = ac[::-1]

            tmpsyn = np.zeros((lens, 12))
            shp = np.zeros(12)
            scl = np.zeros(12)

            #make an array of 1s for later - used to store zeros if necessary
            zeros = np.zeros((lens, 12)) + 1.0

            #Compute the parameters for the gamma distribution, one month at a time
            for k in range(0, 12):

                tmp = series[:, k]
                tmpsave = tmp

                #remove any NaNs (i.e. missing numbers) from data so only real numbers exist
                tmp = tmp[~np.isnan(tmp)]

                if len(tmp) > 10:
                    #compute the number of zeros
                    numzeros = (tmp[np.where(tmp == 0.0)]).size

                    #compute the probability of zeros based on the sample series
                    q = numzeros / tmp.size
                    #compute the probability of non-zeros based on the sample series
                    p = 1.0 - q

                    #compute the shape, scale and location parameters based on non-zero data only
                    nonzerotmp = tmp[np.where(tmp > 0.0)]
                    numnonzero = nonzerotmp.size
                    A = np.log(np.mean(nonzerotmp)) - (
                        np.sum(np.log(nonzerotmp)) / len(nonzerotmp))
                    shp[k] = (1.0 / (4 * A)) * (1 + ((1 + ((4 * A) / 3))**0.5))
                    scl[k] = np.mean(nonzerotmp) / shp[k]

                    tmpz = np.zeros(lens) + 1.0
                    tmpz[0:numzeros] = 0.0
                    np.random.shuffle(tmpz)

                    zeros[:, k] = tmpz

                else:
                    tmps = np.zeros(len(tmpsave)) + miss

            #Reshape shape and scale
            alpha = np.tile(shp, lens)
            beta = np.tile(scl, lens)
            zeros = zeros.flatten()

            #Compute NSYN number of synthetic (surrogate) series
            for m in range(0, nsyn):
                #noise = ((alpha[0]*beta[0]) + (alpha[0]*(beta[0]**2))*(stats.norm.rvs()))
                noise = stats.gamma.rvs(alpha[0], scale=beta[0], size=6)
                surgam = np.zeros((time.size))
                surgam[0:6] = noise

                #Compute surrogate data using form x(t) = a0 + a1(xt-1) + sigma*e1
                #for w in range(7, time.size):
                #	if zeros[w] < 1.0:
                #		surgam[w] = 0.0
                #		#noise = ((alpha[w]*beta[w]) + (alpha[w]*(beta[w]**2))*(stats.norm.rvs()))
                #		noise = stats.gamma.rvs(alpha[w], scale = beta[w])
                #	else:
                #		noise = stats.gamma.rvs(alpha[w], scale = beta[w])
                #		surgam[w] = noise + (np.sum(ac*surgam[w-7:w-1]))
                #if surgam[100] > 1000: print(surgam[100])
                surgam = x

                synthseries[i, j, :, m] = surgam
            #synthtest[i,j,:,m] = tmps

    return synthseries, data, alpha, beta, zeros, ac, surgam
def gpcp_corr(sone, stwo):
	from netcdf_tools import ncextractall
	import numpy as np
	from scipy import stats
	from netCDF4 import num2date
	
	obfile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
	obsnc = ncextractall(obfile)
	obsp = obsnc['precip']
	lon = obsnc['lon']
	nlon = lon.size
	lat = obsnc['lat']
	nlat = lat.size
	time = obsnc['time']
	obsmiss = obsnc['precip_missing_value']
	obsp[np.where(obsp == obsmiss)] = np.nan
	
		#convert time units to actual date
	time_u = obsnc['time_units']
	if 'time_calendar' in obsnc.keys(): 
	     cal = obsnc['time_calendar']
	     time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
		#check that the data array begins in January 
	i = 0
	smon = time[i].month
	while (smon != 1):
	    i = i + 1
	    smon = time[i].month 
	    
	#clip the array at the start   
	obsp = obsp[i:,:,:]
	time = time[i:]
	
	#check that the data array ends in December 
	i = len(time) - 1
	emon = time[i].month
	while (emon != 12):
	    i = i - 1
	    emon = time[i].month
	#clip the array at the end    
	obsp = obsp[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1
	time = time[:i+1]
	ntime = time.size
	
	corr = np.zeros((nlat,nlon)) +  obsmiss
	
	
	for i in range(0,nlon):
		for j in range(0,nlat):
		
			series = obsp[:,j,i]
			lens = series.size
			lens = lens/12
			lens = int(lens)
			series = series.reshape([lens,12])
			
			if sone[0] > sone[sone.size-1]:
				sidxone = np.arange(0,sone.size)
				series = np.roll(series,-1*(sone[0]-1))
				seriesone = np.mean(series[:,sidxone],axis=1)
			else:
				sidxone = sone - 1
				seriesone = np.mean(series[:,sidxone],axis=1)
			
			if stwo[0] > stwo[stwo.size-1]:
				sidxone = np.arange(0,sone.size)
				series = np.roll(series,-1*(sone[0]-1))
				seriestwo = np.mean(series[:,sidxtwo],axis=1)
			else:
				sidxtwo = stwo - 1
				seriestwo = np.mean(series[:,sidxtwo],axis=1)
			
			corr[j,i] = stats.pearsonr(seriesone, seriestwo)[0]
	
	return corr, lat, lon
	
	
	
	
	
	
	
def synth_climseries(file, vname, nsyn) :

	import numpy as np
	import scipy.stats as stats
	from netcdf_tools import ncextractall
	from netCDF4 import num2date

	#Import and extract the netcdf file, returns a dictionary
	datdict = ncextractall(file)
	
	#Read each variable
	data = datdict[vname]
	lon = datdict['lon']
	lat = datdict['lat']
	time = datdict['time']
	
	#convert time units to actual date
	time_u = datdict['time_units']
	if 'time_calendar' in datdict.keys(): 
		cal = datdict['time_calendar']
		time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
	#convert missing numbers to NaN
	miss = datdict[vname+"_missing_value"] 
	data = np.where(data == miss,np.nan,data)
	
	#check that the data array begins in January 
	i = 0
	smon = time[i].month
	while (smon != 1):
		i = i + 1
		smon = time[i].month 
		
	#clip the array at the start   
	data = data[i:,:,:]
	time = time[i:]
	
	#check that the data array ends in December 
	i = len(time) - 1
	emon = time[i].month
	while (emon != 12):
		i = i - 1
		emon = time[i].month
	#clip the array at the end    
	data = data[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1
	time = time[:i+1]
	
	#Create an empty array that will contain the synthetic series
	synthseries = np.zeros((lat.size, lon.size, time.size, nsyn))
	synthtest = np.zeros((lat.size, lon.size, time.size, nsyn))
	des = np.zeros((time.size, lat.size, lon.size))
	ac = np.zeros((lat.size, lon.size))
	
	for i in range(0,len(lat)):
		print ("Lat ", i, "of ", len(lat))
		for j in range(0,len(lon)):

			series= data[:,i,j]
			series = series.flatten()
			
			
			
			#The data must be stratified into months so that the SPI may be computed
			#for each month so data is deseasonalised. 
			#Reshape the array to stratify into months.
			lens = len(series)
			lens = lens/12
			lens = int(lens)
			series = series.reshape([lens,12])
			
			#compute auto-correlation of the deseasonalised time series (to remove auto-
			#correlation associated with seasonality, which will automatically be removed
			#when gamma distributions are computed seasonally)
			sm = series.mean(axis=0)
			seriessm = np.tile(sm,36)
			deseas = (series.flatten()) - seriessm
			des[:,i,j] = deseas
			lag = np.roll(deseas, -1)
			ac[i,j] = stats.pearsonr(deseas, lag)[0]
			
			#Compute NSYN number of synthetic series
			
			for m in range(0,nsyn-1):
				
				tmpsyn = np.zeros((lens,12))
				 
				#Compute the parameters for the gamma distribution, one month at a time
				for k in range(0,12):    
					
					tmp = series[:,k]
					tmpsave = tmp
					
					#remove any NaNs (i.e. missing numbers) from data so only real numbers exist
					tmp = tmp[~np.isnan(tmp)]
					
					
					if len(tmp) > 10:
						#compute the number of zeros
						numzeros = (tmp[np.where(tmp == 0.0)]).size
			
						#compute the probability of zeros based on the sample series
						q = numzeros/tmp.size
						#compute the probability of non-zeros based on the sample series
						p = 1.0 - q
						
						
						#compute the shape, scale and location parameters based on non-zero data only
						nonzerotmp = tmp[np.where(tmp > 0.0)]
						numnonzero = nonzerotmp.size
						A = np.log(np.mean(nonzerotmp)) - (np.sum(np.log(nonzerotmp))/len(nonzerotmp))
						shp = (1.0/(4*A)) * (1 + ((1 + ((4*A)/3) )**0.5))
						scl = np.mean(nonzerotmp)/shp 
		
						#test bit-------
						
						
						
						
						#--------------
		
		
		
						#Compute synthetic distribution of non-zero values 
						synthgam = stats.gamma.rvs(shp, scale=scl, size=numnonzero)
		
						if q > 0.0: 
							zeroarr = np.zeros(numzeros)
							synthgam = np.concatenate((zeroarr, synthgam))
							np.random.shuffle(synthgam)
						
						tmpsyn[:,k] = synthgam
						tmps = tmpsyn.flatten()
						
					else: tmps = np.zeros(len(tmpsave)) + miss
					
				tmps = tmps.flatten()
				synthseries[i,j,:,m] = tmps + ((ac[i,j])*tmps)
				synthtest[i,j,:,m] = tmps
		

	
	return synthseries, synthtest, data, des, lon, lat, time, time_u, miss, ac
def synth_test(file, vname, nsyn) :

	import numpy as np
	import scipy.stats as stats
	from netcdf_tools import ncextractall
	from netCDF4 import num2date

	#Import and extract the netcdf file, returns a dictionary
	datdict = ncextractall(file)
	
	#Read each variable
	data = datdict[vname]
	lon = datdict['lon']
	lat = datdict['lat']
	time = datdict['time']
	
	#convert time units to actual date
	time_u = datdict['time_units']
	if 'time_calendar' in datdict.keys(): 
		cal = datdict['time_calendar']
		time = num2date(time,units = time_u, calendar=cal)
	else: time = num2date(time,units = time_u)
	
	#convert missing numbers to NaN
	miss = datdict[vname+"_missing_value"] 
	data = np.where(data == miss,np.nan,data)
	
	#check that the data array begins in January 
	i = 0
	smon = time[i].month
	while (smon != 1):
		i = i + 1
		smon = time[i].month 
		
	#clip the array at the start   
	data = data[i:,:,:]
	time = time[i:]
	
	#check that the data array ends in December 
	i = len(time) - 1
	emon = time[i].month
	while (emon != 12):
		i = i - 1
		emon = time[i].month
	#clip the array at the end    
	data = data[:i+1,:,:] #remember that Python does not include the final value, so it has to be +1
	time = time[:i+1]
	
	#Create an empty array that will contain the synthetic series
	synthseries = np.zeros((lat.size, lon.size, time.size, nsyn))
	des = np.zeros((time.size, lat.size, lon.size))
	
	for i in range(1,len(lat)):
		print ("Lat ", i, "of ", len(lat))
		for j in range(1,len(lon)):

			series= data[:,i,j]
			series = series.flatten()
			
			
			#The data must be stratified into months so that the SPI may be computed
			#for each month so data is deseasonalised. 
			#Reshape the array to stratify into months.
			lens = len(series)
			lens = lens/12
			lens = int(lens)
			series = series.reshape([lens,12])
			
			#compute auto-correlation of the deseasonalised time series (to remove auto-
			#correlation associated with seasonality, which will automatically be removed
			#when gamma distributions are computed seasonally)
			sm = series.mean(axis=0)
			seriessm = np.tile(sm,lens)
			deseas = (series.flatten()) - seriessm
			des[:,i,j] = deseas
			ac = np.zeros(6)
			for l in range(1,7):ac[l-1]=stats.pearsonr(deseas, np.roll(deseas,(-1*l)))[0]
			#reverse the autocorrelation function for use later
			ac = ac[::-1]
				
			tmpsyn = np.zeros((lens,12))
			shp = np.zeros(12)
			scl = np.zeros(12)
				
			#make an array of 1s for later - used to store zeros if necessary
			zeros = np.zeros((lens,12))+1.0
				 
			#Compute the parameters for the gamma distribution, one month at a time
			for k in range(0,12):    
					
				tmp = series[:,k]
				tmpsave = tmp
					
				#remove any NaNs (i.e. missing numbers) from data so only real numbers exist
				tmp = tmp[~np.isnan(tmp)]
					
				if len(tmp) > 10:
					#compute the number of zeros
					numzeros = (tmp[np.where(tmp == 0.0)]).size
			
					#compute the probability of zeros based on the sample series
					q = numzeros/tmp.size
					#compute the probability of non-zeros based on the sample series
					p = 1.0 - q
					
					#compute the shape, scale and location parameters based on non-zero data only
					nonzerotmp = tmp[np.where(tmp > 0.0)]
					numnonzero = nonzerotmp.size
					A = np.log(np.mean(nonzerotmp)) - (np.sum(np.log(nonzerotmp))/len(nonzerotmp))
					shp[k] = (1.0/(4*A)) * (1 + ((1 + ((4*A)/3) )**0.5))
					scl[k] = np.mean(nonzerotmp)/shp[k]
					
					tmpz = np.zeros(lens)+1.0
					tmpz[0:numzeros]=0.0
					np.random.shuffle(tmpz)
					
					zeros[:,k] = tmpz

						
				else: tmps = np.zeros(len(tmpsave)) + miss
			

			#Reshape shape and scale
			alpha = np.tile(shp,lens)
			beta = np.tile(scl, lens)
			zeros = zeros.flatten()
			
			#Compute NSYN number of synthetic (surrogate) series
			for m in range(0,nsyn):
				#noise = ((alpha[0]*beta[0]) + (alpha[0]*(beta[0]**2))*(stats.norm.rvs()))
				noise = stats.gamma.rvs(alpha[0], scale = beta[0], size=6) 
				surgam = np.zeros((time.size))
				surgam[0:6] = noise
				
				#Compute surrogate data using form x(t) = a0 + a1(xt-1) + sigma*e1
				#for w in range(7, time.size):
				#	if zeros[w] < 1.0: 
				#		surgam[w] = 0.0
				#		#noise = ((alpha[w]*beta[w]) + (alpha[w]*(beta[w]**2))*(stats.norm.rvs()))
				#		noise = stats.gamma.rvs(alpha[w], scale = beta[w]) 
				#	else: 
				#		noise = stats.gamma.rvs(alpha[w], scale = beta[w])  
				#		surgam[w] = noise + (np.sum(ac*surgam[w-7:w-1])) 
				#if surgam[100] > 1000: print(surgam[100])
				surgam = x
				
				synthseries[i,j,:,m] = surgam
			#synthtest[i,j,:,m] = tmps
		

	
	return synthseries, data, alpha,beta,zeros, ac, surgam
def plot_mmm_bootstrap(pc, wl, s):
	import numpy as np
	from netcdf_tools import ncextractall
	from matplotlib import pyplot as plt
	import cartopy.crs as ccrs
	from matplotlib.colors import BoundaryNorm
	from matplotlib.ticker import MaxNLocator
	from grid_tools import fold_grid
	
	ofile = '/Users/ailieg/Data/drought_model_eval_data/data/obs/GPCP/precip.mon.mean.nc'
	
	cmiptitle = ['ACCESS1-0_historical_r1i1p1',\
				'CanESM2_historical_r1i1p1',\
				'GFDL-CM3_historical_r1i1p1',\
				'HadGEM2-CC_historical_r1i1p1',\
				'MPI-ESM-P_historical_r1i1p1',\
				'CCSM4_historical_r1i1p1',\
				'FGOALS-s2_historical_r1i1p1',\
				'GISS-E2-R_historical_r6i1p1',\
				'NorESM1-M_historical_r1i1p1',\
				'IPSL-CM5B-LR_historical_r1i1p1']
				
	amiptitle = ['FGOALS-s2_amip_r1i1p1',\
				'GFDL-CM3_amip_r1i1p1',\
				'HadGEM2-A_amip_r1i1p1',\
				'NorESM1-M_amip_r1i1p1']
				
	cmipfile = ['CMIP5/ACCESS1-0/r1i1p1/pr/pr_Amon_ACCESS1-0_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/CanESM2/r1i1p1/pr/pr_Amon_CanESM2_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/GFDL-CM3/r1i1p1/pr/pr_Amon_GFDL-CM3_historical_r1i1p1_186001-200512.nc',\
				'CMIP5/HadGEM2-CC/r1i1p1/pr/pr_Amon_HadGEM2-CC_historical_r1i1p1_185912-200511.nc',\
				'CMIP5/MPI-ESM-P/r1i1p1/pr/pr_Amon_MPI-ESM-P_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/CCSM4/r1i1p1/pr/pr_Amon_CCSM4_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/FGOALS-s2/r1i1p1/pr/pr_Amon_FGOALS-s2_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/GISS-E2-R/r6i1p1/pr/pr_Amon_GISS-E2-R_historical_r6i1p1_185001-200512.nc',\
				'CMIP5/NorESM1-M/r1i1p1/pr/pr_Amon_NorESM1-M_historical_r1i1p1_185001-200512.nc',\
				'CMIP5/IPSL-CM5B-LR/r1i1p1/pr/pr_Amon_IPSL-CM5B-LR_historical_r1i1p1_185001-200512.nc']
				
	amipfile = ['AMIP/FGOALS-s2/r1i1p1/pr/pr_Amon_FGOALS-s2_amip_r1i1p1_197901-200812.nc',\
				'AMIP/GFDL-CM3/r1i1p1/pr/pr_Amon_GFDL-CM3_amip_r1i1p1_197901_200812.nc',\
				'AMIP/HadGEM2-A/r1i1p1/pr/pr_Amon_HadGEM2-A_amip_r1i1p1_197809-200811.nc',\
				'AMIP/NorESM1-M/r1i1p1/pr/pr_Amon_NorESM1-M_amip_r1i1p1_197901-200512.nc']
	
	modfile = cmipfile
	intitle = cmiptitle
	
	modpath = '/Users/ailieg/Data/drought_model_eval_data/data/'
	
	
	obsnc = ncextractall(ofile)
	lon = obsnc['lon']
	lat = obsnc['lat']
	
	sigmod = np.zeros((len(modfile), lat.size, lon.size))
	
	for i in range(0,len(modfile)):
		
		mfile = modpath+modfile[i]
		it = intitle[i]

		d, sig, lat, lon = perc_compare_bsoblen(pc, wl, s, ofile, mfile, it)
		
		sig[sig < 0.0] = 0.0
		sigmod[i,:,:] = sig
		
	sumsig = np.sum(sigmod, axis=0)/len(modfile)
	
	#Set levels and colormap
	levels = np.arange(0,100,10)
	cmap = plt.get_cmap('Spectral')
	norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
	
	#Make the grid circular
	sumsig, lat, lon = fold_grid(sumsig, lat, lon)
	
	#Set axes and plot
	ax = plt.axes(projection=ccrs.PlateCarree())
	p=plt.pcolormesh(lon, lat, d, cmap=cmap, norm=norm)
	
	#Add a colorbar
	cbar = plt.colorbar(p, extend='both')
	cbar.ax.set_ylabel('%')
	ax.coastlines()
	
	#Create title for saved plot
	seasname = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',\
	'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',\
	'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
	title = 'PERC_MODELS_CMIP_'+str(wl)+'mth_'+seasname[s-1]+seasname[s+wl-2]+'_'+str(pc)+'th%ile'
	plt.title(title, fontsize=10)
	
	#Save the output
	savefile = title+'.png'
	outfile = '/Users/ailieg/Data/drought_model_eval_data/analysis/'+savefile
	plt.savefig(outfile, dpi=400, format='png',bbox_inches='tight')
	plt.close()