def calc_HadISST_residual_EOFs(histo_sy, histo_ey, run_n):
    # load the already calculated residuals
    resid_fname = get_HadISST_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(resid_fname, 'r')
    lats_var = fh.variables["latitude"]
    lons_var = fh.variables["longitude"]
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    sst_data = numpy.ma.masked_equal(var[:], mv)

    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_data, center=False, weights=wgts)
    pcs = eof_solver.pcs(npcs=None)
    eofs = eof_solver.eofs(neofs=None)

    # get the output names
    out_eofs_fname = get_HadISST_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_residual_PCs_fname(histo_sy, histo_ey, run_n)
    
    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    save_pcs(out_pcs_fname, pcs, attrs)
    fh.close()
Ejemplo n.º 2
0
def smooth(variable, window) :
    from axis import Axes, TimeAxis
    from variable import Variable
    if len(variable.shape) > 1 :
        raise NotImplementedError    
    try :
        variable.dts
    except :
        raise NotImplementedError    
    if window%2 == 0 :
        raise NotImplementedError    
    mask = np.ones(window)
    #mask[int(window/2)] = 1    
    mask /= window*1.0
    newAxes = Axes()
    newAxes['time'] = TimeAxis(variable.dts[int(window/2):-int(window/2)])
    return Variable(
            data = np.convolve(variable.data, mask, mode='valid'),
            axes = newAxes,
            metadata = variable.metadata)
    from eofs.standard import Eof
    wgts = np.cos(variable.lats*np.pi/180)**0.5
    solver = Eof(variable.data, weights = wgts[:, None])
    eof1 = solver.eofs(eofscaling=2, neofs=1)
    print solver.varianceFraction(neigs=1)[0]*100, '%'
    output = variable[0].empty()
    output.data = eof1[0]
    return output
Ejemplo n.º 3
0
def eof(variable) :
    from eofs.standard import Eof
    wgts = np.cos(variable.lats*np.pi/180)**0.5
    solver = Eof(variable.data, weights = wgts[:, None])
    eof1 = solver.eofs(eofscaling=2, neofs=1)
    print solver.varianceFraction(neigs=1)[0]*100, '%'
    output = variable[0].empty()
    output.data = eof1[0]
    return output
Ejemplo n.º 4
0
 def eof(self,no_of_eofs=1):
     data_mean = self._data.mean(axis=0)
     coslat = np.cos(np.deg2rad(self._lat)).clip(0., 1.) 	#weighting
     wgts = np.sqrt(coslat)[..., np.newaxis]
     solver = Eof(self._data, weights=wgts)
     # Retrieve the leading EOF, expressed as the covariance between the leading PC
     # time series and the input SLP anomalies at each grid point.
     eof = solver.eofsAsCorrelation(neofs=no_of_eofs)
     fraction=solver.varianceFraction(no_of_eofs)
     return Eof_pattern(eof, fraction,self._lat,self._lon)
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400):
    # load in the PCs and EOFs
    histo_sy = 1899
    histo_ey = 2010
    
#    monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
#    monthly_pcs = load_data(monthly_pc_fname)
#    monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
#    monthly_eofs = load_sst_data(monthly_eof_fname, "sst")

    monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(monthly_residuals_fname, 'r')
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    monthly_residuals = numpy.ma.masked_equal(var[:], mv)

    # weights for reconstruction / projection
    coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.))
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(monthly_residuals, center=False, weights=wgts)
    monthly_pcs = eof_solver.pcs(npcs=n_pcs)
    monthly_eofs = eof_solver.eofs(neofs=n_pcs)
    
    # get the explanation of variance and calculate the scalar from it
    M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs))
    
    # get the number of months to predict the PCs for and create the storage
    histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods()
    n_mnths = 12*(rcp_ey - histo_sy)
    predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f")

    # fit an AR process to the first ~20 pcs
    for pc in range(0, n_pcs):
        # create the model
        arn = ARN(monthly_pcs[:,pc].squeeze())
        # fit the model to the data
        res = arn.fit()
        arp = res.k_ar
        # create a timeseries of predicted values
        predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11)

    # reconstruct the field and return
    # reconstruct the field
    monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts)
    return monthly_intvar
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22):
    # load the already calculated residuals
    resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # note that we don't have to subtract the annual cycle any more as the
    # residuals are with respect to a smoothed version of the monthly ssts
    
    resid_mon_fh = netcdf_file(resid_fname, 'r')
    sst_var = resid_mon_fh.variables["sst"]
    lats_var = resid_mon_fh.variables["latitude"]
    lons_var = resid_mon_fh.variables["longitude"]
    attrs = sst_var._attributes
    mv = attrs["_FillValue"]
    ssts = numpy.array(sst_var[:])
    sst_resids = numpy.ma.masked_less(ssts, -1000)
    
    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_resids, center=True, weights=wgts)
    pcs = eof_solver.pcs(npcs=n_eofs)
    eofs = eof_solver.eofs(neofs=n_eofs)
    varfrac = eof_solver.varianceFraction(neigs=n_eofs)
    evs = eof_solver.eigenvalues(neigs=n_eofs)
    evs = evs.reshape([1,evs.shape[0]])
    print evs.shape
    
    # get the output names
    out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    out_evs_fname  = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n)

    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]])
    save_pcs(out_pcs_fname, out_pcs, attrs)
    save_eigenvalues(out_evs_fname, evs, attrs)
    resid_mon_fh.close()
Ejemplo n.º 7
0
# Read SST anomalies using the netCDF4 module. The file contains
# November-March averages of SST anomaly in the central and northern Pacific.
filename = example_data_path('sst_ndjfm_anom.nc')
ncin = Dataset(filename, 'r')
sst = ncin.variables['sst'][:]
lons = ncin.variables['longitude'][:]
lats = ncin.variables['latitude'][:]
ncin.close()

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(lats))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(sst, weights=wgts)

# Retrieve the leading EOF, expressed as the correlation between the leading
# PC time series and the input SST anomalies at each grid point, and the
# leading PC time series itself.
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)

# Plot the leading EOF expressed as correlation in the Pacific domain.
clevs = np.linspace(-1, 1, 11)
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190))
fill = ax.contourf(lons, lats, eof1.squeeze(), clevs,
                   transform=ccrs.PlateCarree(), cmap=plt.cm.RdBu_r)
ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k')
cb = plt.colorbar(fill, orientation='horizontal')
cb.set_label('correlation coefficient', fontsize=12)
Ejemplo n.º 8
0
def main(mFilepath, xFilepath, yFilepath, window, windowFlag=True):
    ## load in the data matrix as a numpy array
    m = np.loadtxt(mFilepath, dtype='float', delimiter=',', skiprows=1)
    # lon = np.loadtxt(xFilepath, dtype='float', delimiter=',', skiprows=1)
    # lat = np.loadtxt(yFilepath, dtype='float', delimiter=',', skiprows=1)
    # time =  np.arange('1958-01-01', '2014-09-22', dtype='datetime64')
    # years = range(1958, 2014)
    ## Create a list of dates spanning the study period
    base = dt.datetime(2014, 9, 21, 1, 1, 1, 1)
    dates = [base - dt.timedelta(days=x) for x in range(0, 20718)]
    date_list = [item for item in reversed(dates)]


    ## attempted to read in the raw data, but was struggling with
    ## the array dimensions
    # ncFiles = os.listdir(workspace)
    # slpList, lonList, latList, timeList = [], [], [], []
    # for fileIn in ncFiles:
    #     ncIn = Dataset(os.path.join(workspace, fileIn), 'r')
    #     slpList.append(ncIn.variables['slp'][:]/100)
    #     lonList.append(ncIn.variables['lon'][:])
    #     latList.append(ncIn.variables['lat'][:])
    #     timeList.append(ncIn.variables['time'][:])
    #     ncIn.close()

    # slp = np.array(slpList)
    # print(slp)
    # print(slp.shape)
    # # print(slp)
    # # print(np.shape(slp))

    ## create an EOF solver object and extrac the first
    ## 4 EOFs and their associated PCs. Scaling can be 
    ## applied if desired
    ## http://ajdawson.github.io/eofs/api/eofs.standard.html#eofs.standard.Eof
    solver = Eof(m)
    eofs = solver.eofs(neofs=4, eofscaling=0)
    pcs = solver.pcs(npcs=4, pcscaling=0)

    # lon, lat = np.meshgrid(lon, lat)

    ## plot the EOFs as nongeographic data for simplicity
    fig = plt.figure(figsize=(10, 10))
    for i in range(4):
        ax = fig.add_subplot(2, 2, i+1)
        lab = 'EOF' + str(i + 1)
        main =  'Unscaled ' + lab

        eofPlot = eofs[i,].reshape(17, 32)

        plt.imshow(eofPlot, cmap=plt.cm.RdBu_r)
        plt.title(main)
        cb = plt.colorbar(orientation='horizontal', cmap=plt.cm.RdBu_r)
        cb.set_label(lab, fontsize=12)

        ## Basemap failure below.  Something with the y cell size went wrong
        # bm = Basemap(projection='cyl', llcrnrlat=16.17951, urcrnrlat=68.48459,
        #              llcrnrlon=-176.0393, urcrnrlon=-98.07901, resolution='c')

        # # bm.contourf(x, y, eof1.squeeze(), clevs, cmap=plt.cm.RdBu_r)
        # bm.drawcoastlines()
        # bm.drawstates()
        # im = bm.pcolormesh(lon, lat, eofPlot, cmap=plt.cm.RdBu_r, latlon=True)
        # # bm.fillcontinents(color='coral', lake_color='aqua')
        # bm.drawparallels(np.arange(-90.,91.,15.))
        # bm.drawmeridians(np.arange(-180.,181.,30.))
        # # bm.drawmapboundary(fill_color='aqua')
        # cb = plt.colorbar(orientation='horizontal')
        # cb.set_label(lab, fontsize=12)
        # plt.title(main, fontsize=16)
        # plt.show()
    plt.show()
    ## Plot the PCs as a time series
    fig = plt.figure(figsize=(16, 16))
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series'

        pcPlot = pcs[:,i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(date_list, pcPlot, color='b')
        if windowFlag:
            plt.plot(date_list, movingaverage(pcPlot, window), 
                     color='r', linestyle='-')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

    ## Subset the dates to the last year of the dataset
    short_date = [item for item in date_list if 
                  item >= dt.datetime(2013, 6, 17) 
                  and item < dt.datetime(2014, 6, 25)]
    indices = [date_list.index(item) for item in short_date]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 year)'

        pcPlot = pcs[np.array(indices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(short_date, pcPlot, color='b')
        
        if windowFlag:
            plt.plot(short_date, 
                     movingaverage(pcPlot, window), color='r')

        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

        ## Subset the dates to the last year of the dataset
    decade = [item for item in date_list if 
              item >= dt.datetime(2004, 6, 17) 
                  and item < dt.datetime(2014, 6, 17)]
    decadeIndices = [date_list.index(item) for item in decade]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 decade)'

        pcPlot = pcs[np.array(decadeIndices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(decade, pcPlot, color='b')
        if windowFlag:
            plt.plot(decade, 
                     movingaverage(pcPlot, window), color='r')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()
Ejemplo n.º 9
0
#		print "\n Subset of 200..."
#		np.random.shuffle(filenames)
#		filenames=filenames[:200]


		data=load_regional(filenames,ny,nx)
		data=np.ma.masked_values(data,2.e+20)
		print "data loaded",data.shape
	
		# Set up info
		plt.set_cmap('RdBu')
		neofs=5
		nens=data.shape[0]
		nwanted=57
	
		solver=Eof(data)
		print 'set up EOF solver'
		pcs=solver.pcs(npcs=neofs,pcscaling=1)
		eofs=solver.eofs(neofs=neofs)
		varfrac=solver.varianceFraction(neigs=neofs)
		print 'calculated EOFs'
		print 'printing EOFs'
		for i in range(neofs):
			print 'EOF',i
			plt.clf()
			plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i])
		print "plotting histograms of PCs"
		for i in range(3):
			plt.clf()
			plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i))
			plt.ylim([0,.6])
Ejemplo n.º 10
0
    # only keep region of interest?
    lato = lat
    lono = lon
    lat = lat[np.logical_and(lat >= latlims[0], lat <= latlims[1])]
    lon = lon[np.logical_and(lon >= lonlims[0], lon <= lonlims[1])]

    fldca1 = fldca1.reshape((oshape[0], len(lat), len(lon)))  # fldcash[0]/oshape[0])) # keep time dim
    fldca = fldca1
    fldpa1 = fldpa1.reshape((oshape[0], len(lat), len(lon)))
    fldpa = fldpa1

coslat = np.cos(np.deg2rad(lat)).clip(0.0, 1.0)  # why square root of cos lat? (better for EOF for some reason.)
wgts = np.sqrt(coslat)[..., np.newaxis]


solverc = Eof(fldca, weights=wgts)
if docorr:
    eof1c = solverc.eofsAsCorrelation(neofs=enum)
    eof1c = eof1c[enum - 1, ...]
else:
    eof1c = solverc.eofsAsCovariance(neofs=enum)
    eof1c = eof1c[enum - 1, ...]

eof1c = eof1c.squeeze()
eigsc = solverc.eigenvalues()
vexpc = eigsc[enum - 1] / eigsc.sum() * 100  # percent variance explained

fig, axs = plt.subplots(1, 4)
fig.set_size_inches(12, 5)
ax = axs[0]
cplt.kemmap(eof1c, lat, lon, type=type, title=sim + " control EOF" + str(enum), axis=ax, cmin=cmin, cmax=cmax)
Ejemplo n.º 11
0
# European/Atlantic domain (80W-40E, 20-90N).
filename = example_data_path('hgt_djf.nc')
ncin = Dataset(filename, 'r')
z_djf = ncin.variables['z'][:]
lons = ncin.variables['longitude'][:]
lats = ncin.variables['latitude'][:]
ncin.close()

# Compute anomalies by removing the time-mean.
z_djf_mean = z_djf.mean(axis=0)
z_djf = z_djf - z_djf_mean

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(lats)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(z_djf, weights=wgts)

# Retrieve the leading EOF, expressed as the covariance between the leading PC
# time series and the input SLP anomalies at each grid point.
eof1 = solver.eofsAsCovariance(neofs=1)

# Plot the leading EOF expressed as covariance in the European/Atlantic domain.
m = Basemap(projection='ortho', lat_0=60., lon_0=-20.)
x, y = m(*np.meshgrid(lons, lats))
m.contourf(x, y, eof1.squeeze(), cmap=plt.cm.RdBu_r)
m.drawcoastlines()
plt.title('EOF1 expressed as covariance', fontsize=16)

plt.show()