def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400):
    # load in the PCs and EOFs
    histo_sy = 1899
    histo_ey = 2010
    
#    monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
#    monthly_pcs = load_data(monthly_pc_fname)
#    monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
#    monthly_eofs = load_sst_data(monthly_eof_fname, "sst")

    monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(monthly_residuals_fname, 'r')
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    monthly_residuals = numpy.ma.masked_equal(var[:], mv)

    # weights for reconstruction / projection
    coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.))
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(monthly_residuals, center=False, weights=wgts)
    monthly_pcs = eof_solver.pcs(npcs=n_pcs)
    monthly_eofs = eof_solver.eofs(neofs=n_pcs)
    
    # get the explanation of variance and calculate the scalar from it
    M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs))
    
    # get the number of months to predict the PCs for and create the storage
    histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods()
    n_mnths = 12*(rcp_ey - histo_sy)
    predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f")

    # fit an AR process to the first ~20 pcs
    for pc in range(0, n_pcs):
        # create the model
        arn = ARN(monthly_pcs[:,pc].squeeze())
        # fit the model to the data
        res = arn.fit()
        arp = res.k_ar
        # create a timeseries of predicted values
        predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11)

    # reconstruct the field and return
    # reconstruct the field
    monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts)
    return monthly_intvar
Esempio n. 2
0
def eof_computation(var, varunits, lat, lon):
    #----------------------------------------------------------------------------------------
    print(
        '____________________________________________________________________________________________________________________'
    )
    print('Computing the EOFs and PCs')
    #----------------------------------------------------------------------------------------
    # EOF analysis of a data array with spatial dimensions that
    # represent latitude and longitude with weighting. In this example
    # the data array is dimensioned (ntime, nlat, nlon), and in order
    # for the latitude weights to be broadcastable to this shape, an
    # extra length-1 dimension is added to the end:
    weights_array = np.sqrt(np.cos(np.deg2rad(lat)))[:, np.newaxis]

    start = datetime.datetime.now()
    solver = Eof(var, weights=weights_array)
    end = datetime.datetime.now()
    print('EOF computation took me %s seconds' % (end - start))

    #ALL VARIANCE FRACTIONS
    varfrac = solver.varianceFraction()
    acc = np.cumsum(varfrac * 100)

    #------------------------------------------PCs unscaled  (case 0 of scaling)
    pcs_unscal0 = solver.pcs()
    #------------------------------------------EOFs unscaled  (case 0 of scaling)
    eofs_unscal0 = solver.eofs()

    #------------------------------------------PCs scaled  (case 1 of scaling)
    pcs_scal1 = solver.pcs(pcscaling=1)

    #------------------------------------------EOFs scaled (case 2 of scaling)
    eofs_scal2 = solver.eofs(eofscaling=2)

    return solver, pcs_scal1, eofs_scal2, pcs_unscal0, eofs_unscal0, varfrac
def calc_HadISST_residual_EOFs(histo_sy, histo_ey, run_n):
    # load the already calculated residuals
    resid_fname = get_HadISST_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(resid_fname, 'r')
    lats_var = fh.variables["latitude"]
    lons_var = fh.variables["longitude"]
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    sst_data = numpy.ma.masked_equal(var[:], mv)

    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_data, center=False, weights=wgts)
    pcs = eof_solver.pcs(npcs=None)
    eofs = eof_solver.eofs(neofs=None)

    # get the output names
    out_eofs_fname = get_HadISST_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_residual_PCs_fname(histo_sy, histo_ey, run_n)
    
    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    save_pcs(out_pcs_fname, pcs, attrs)
    fh.close()
Esempio n. 4
0
def eofs_as(dat):
    A = climatologia_xarray(dat['curl']).values
    global land
    EC, WC, land = get_coasts(dat.lat, dat.lon)

    msk = np.empty(np.shape(A))
    for i in range(0, len(A[:,0,0])):
        msk[i,:,:] = land
        B = np.ma.array(A, mask=msk)
    from get_eddof import get_eddof
    edof = np.empty([len(dat.lat), len(dat.lon)])
    for i in range(0, len(dat.lat)):
        for j in range(0, len(dat.lon)):
            if msk[0,i,j] == False:
                edof[i,j] = get_eddof(B[:,i,j])
            else:
                edof[i,j] = np.nan

    dof = int(np.nanmean(edof))
    coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(B, center=True, weights=wgts, ddof=dof)

    eof = solver.eofs(neofs=10, eofscaling=2)
    pc = solver.pcs(npcs=10, pcscaling=1)
    varfrac = solver.varianceFraction()
    eigvals = solver.eigenvalues()

    x, y = np.meshgrid(dat.lon, dat.lat)

    return eof, pc, varfrac, x, y, edof
def calc_EOF2D(anom, nplat, coslat, varcode):

    # apply sqrt cos latitude weighting
    wgts = np.sqrt(coslat)
    wgts = wgts[:, np.newaxis]

    # leading EOF
    solver = Eof(anom, weights=wgts)
    eof1 = solver.eofs(neofs=1, eofscaling=0)[0]
    if varcode == 'PSL':
        if eof1[np.where(nplat >= 68)[0][0], 0] > 0:  # PSL
            eof1 = -eof1
    elif varcode == 'Z3':
        if eof1[np.where(nplat >= 75)[0][0], 0] > 0:  # Z3
            eof1 = -eof1
    elif varcode == 'U':
        if eof1[np.where(nplat >= 60)[0][0], 0] < 0:  # U
            eof1 = -eof1

    # leading principal component
    PC1 = np.empty([anom.shape[0]])
    for itime in range(anom.shape[0]):
        PC1[itime] = np.dot(anom[itime, :, :].flatten(), eof1.flatten())

    return (eof1, PC1)
Esempio n. 6
0
def smooth(variable, window) :
    from axis import Axes, TimeAxis
    from variable import Variable
    if len(variable.shape) > 1 :
        raise NotImplementedError    
    try :
        variable.dts
    except :
        raise NotImplementedError    
    if window%2 == 0 :
        raise NotImplementedError    
    mask = np.ones(window)
    #mask[int(window/2)] = 1    
    mask /= window*1.0
    newAxes = Axes()
    newAxes['time'] = TimeAxis(variable.dts[int(window/2):-int(window/2)])
    return Variable(
            data = np.convolve(variable.data, mask, mode='valid'),
            axes = newAxes,
            metadata = variable.metadata)
    from eofs.standard import Eof
    wgts = np.cos(variable.lats*np.pi/180)**0.5
    solver = Eof(variable.data, weights = wgts[:, None])
    eof1 = solver.eofs(eofscaling=2, neofs=1)
    print solver.varianceFraction(neigs=1)[0]*100, '%'
    output = variable[0].empty()
    output.data = eof1[0]
    return output
Esempio n. 7
0
def eof(variable) :
    from eofs.standard import Eof
    wgts = np.cos(variable.lats*np.pi/180)**0.5
    solver = Eof(variable.data, weights = wgts[:, None])
    eof1 = solver.eofs(eofscaling=2, neofs=1)
    print solver.varianceFraction(neigs=1)[0]*100, '%'
    output = variable[0].empty()
    output.data = eof1[0]
    return output
def reconstruct_data(arr, neofs=16):
    if type(neofs) == int:
        neofs = [neofs]

    solver = Eof(arr, center=False)
    for n in neofs: 
        reconstructed = solver.reconstructedField(neofs=n)
        pcs = solver.pcs(npcs=n)
        eofs = solver.eofs(neofs=n)
        yield reconstructed, pcs, eofs
Esempio n. 9
0
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400):
    # load in the PCs and EOFs
    histo_sy = 1899
    histo_ey = 2010

    #    monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    #    monthly_pcs = load_data(monthly_pc_fname)
    #    monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    #    monthly_eofs = load_sst_data(monthly_eof_fname, "sst")

    monthly_residuals_fname = get_HadISST_monthly_residuals_fname(
        histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(monthly_residuals_fname, 'r')
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    monthly_residuals = numpy.ma.masked_equal(var[:], mv)

    # weights for reconstruction / projection
    coslat = numpy.cos(
        numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.))
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(monthly_residuals, center=False, weights=wgts)
    monthly_pcs = eof_solver.pcs(npcs=n_pcs)
    monthly_eofs = eof_solver.eofs(neofs=n_pcs)

    # get the explanation of variance and calculate the scalar from it
    M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs))

    # get the number of months to predict the PCs for and create the storage
    histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods()
    n_mnths = 12 * (rcp_ey - histo_sy)
    predicted_pcs = numpy.zeros([n_mnths + 12, n_pcs], "f")

    # fit an AR process to the first ~20 pcs
    for pc in range(0, n_pcs):
        # create the model
        arn = ARN(monthly_pcs[:, pc].squeeze())
        # fit the model to the data
        res = arn.fit()
        arp = res.k_ar
        # create a timeseries of predicted values
        predicted_pcs[:, pc] = M * arn.predict(res.params,
                                               noise='all',
                                               dynamic=True,
                                               start=arp,
                                               end=n_mnths + arp + 11)

    # reconstruct the field and return
    # reconstruct the field
    monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs],
                                       n_pcs, wgts)
    return monthly_intvar
Esempio n. 10
0
    def eof(in_bands):
        data = np.array([in_bands[i].data for i in range(len(in_bands))])

        #take eof over time dimension
        solver = Eof(data)

        eof1 = solver.eofs(neofs=1)[0, :]
        cube = in_bands[0].copy()
        cube.data = eof1

        pc1 = solver.pcs(pcscaling=1, npcs=1)[:, 0]
        var_frac = solver.varianceFraction(neigs=1)[0]
        return cube, pc1, var_frac
def calculate_IOBM(data, lats, lons, times, t_units, calendar):
    """ Calculate the Indian Ocean basin mode as the first EOF over the region 
    20S-20N, 40E-110E.
    See Yang et al (2007) doi:10.1029/2006GL028571"""
    data[np.abs(data) > 1e3] = np.nan
    annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar)
    lat_min, lat_max = -20, 20
    lon_min, lon_max = 40, 110
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    IO_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(IO_SST, weights=wgts)
    IOBM = solver.pcs(npcs=1, pcscaling=1).flatten()
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    if np.nanmean(EOF1) < 0: IOBM = -IOBM
    IOBM = (IOBM - np.mean(IOBM)) / np.std(IOBM)
    return IOBM
def calculate_IPO(data, lats, lons, times, t_units, calendar):
    """ Calculate the Inter-decadal Pacific Oscillation index 
    Calculated as the first EOF of SST 60S to 60N over the
    Pacific  """
    data[np.abs(data) > 1e3] = np.nan  # set unreasonably high values to NaN
    annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar)
    lat_min, lat_max = -60, 60
    lon_min, lon_max = 120, 270
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(Pacific_SST, weights=wgts)
    IPO = solver.pcs(npcs=1, pcscaling=1).flatten()
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    if np.nanmean(EOF1) < 0: IPO = -IPO
    IPO = (IPO - np.mean(IPO)) / np.std(IPO)
    return IPO
def calculate_PDO(data, lats, lons, times, t_units, calendar):
    """ Calculate the Pacific Decadal Oscillation index as the first PC of SST
    between 20N and 70N
    See Newman et al (2016) doi:10.1175/JCLI-D-15-0508.1"""
    data[np.abs(data) > 1e3] = np.nan  # set unreasonably high values to NaN
    global_mean_removed = data - global_mean(data, lats).reshape(
        times.shape[0], 1, 1)
    annual_cycle_removed = remove_annual_cycle(global_mean_removed, times,
                                               t_units, calendar)
    lat_min, lat_max = 20, 70
    lon_min, lon_max = 120, 270
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    N_Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(N_Pacific_SST, weights=wgts)
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    PDO = solver.pcs(npcs=1, pcscaling=1).flatten()
    if np.nanmean(EOF1[:, lons[lon_mask] > 210]) < 0: PDO = -PDO
    PDO = (PDO - np.mean(PDO)) / np.std(PDO)
    return PDO
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22):
    # load the already calculated residuals
    resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # note that we don't have to subtract the annual cycle any more as the
    # residuals are with respect to a smoothed version of the monthly ssts
    
    resid_mon_fh = netcdf_file(resid_fname, 'r')
    sst_var = resid_mon_fh.variables["sst"]
    lats_var = resid_mon_fh.variables["latitude"]
    lons_var = resid_mon_fh.variables["longitude"]
    attrs = sst_var._attributes
    mv = attrs["_FillValue"]
    ssts = numpy.array(sst_var[:])
    sst_resids = numpy.ma.masked_less(ssts, -1000)
    
    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_resids, center=True, weights=wgts)
    pcs = eof_solver.pcs(npcs=n_eofs)
    eofs = eof_solver.eofs(neofs=n_eofs)
    varfrac = eof_solver.varianceFraction(neigs=n_eofs)
    evs = eof_solver.eigenvalues(neigs=n_eofs)
    evs = evs.reshape([1,evs.shape[0]])
    print evs.shape
    
    # get the output names
    out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    out_evs_fname  = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n)

    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]])
    save_pcs(out_pcs_fname, out_pcs, attrs)
    save_eigenvalues(out_evs_fname, evs, attrs)
    resid_mon_fh.close()
Esempio n. 15
0
    def PCA(self, field_name):

        field_name = field_name
        start_interv = self.start_pca
        end_interv = self.end_pca
        observationPeriod = 'data_' + str(start_interv) + '_to_' + str(end_interv)
        modelData = np.load(self.directory_data + '/' + field_name + '_' + observationPeriod + '.npy')

        # Velocity is a 3D vector and needs to be reshaped before the PCA
        if 'Velocity' in field_name:
            modelData = np.reshape(modelData, (modelData.shape[0], modelData.shape[1] * modelData.shape[2]), order='F')

        # Standardise the data with mean 0
        meanData = np.nanmean(modelData, 0)
        stdData = np.nanstd(modelData)
        modelDataScaled = (modelData - meanData) / stdData

        #PCA solver
        solver = Eof(modelDataScaled)

        # Principal Components time-series
        pcs = solver.pcs()
        # Projection
        eof = solver.eofs()
        # Cumulative variance
        varianceCumulative = np.cumsum(solver.varianceFraction())

        np.save(self.directory_data + '/' + 'pcs_' + field_name + '_' + observationPeriod,
                pcs)
        np.save(self.directory_data + '/' + 'eofs_' + field_name + '_' + observationPeriod,
                eof)
        np.save(self.directory_data + '/' + 'varCumulative_' + field_name + '_' + observationPeriod,
                varianceCumulative)
        np.save(self.directory_data + '/' + 'mean_' + field_name + '_' + observationPeriod,
                meanData)
        np.save(self.directory_data + '/' + 'std_' + field_name + '_' + observationPeriod,
                stdData)
Esempio n. 16
0
    # Open the file
    cnc = camgoda(cfull_path)
    tnc = camgoda(tfull_path)
    is3d, var, vname = cnc.ExtractData(variable, box)
    is3d, var, vname = tnc.ExtractData(variable, box)
    if n == 0:
        nlats, nlons = cnc.data.shape
        boxlat = cnc.boxlat
        boxlon = cnc.boxlon
        d = np.zeros(shape=(len(dates), nlats * nlons))
    d[n, :] = np.ndarray.flatten(tnc.data - cnc.data)

# Compute the amplitude timeseries and EOF spatial distributions of the data array
print "Computing the EOF..."
EOF = Eof(d, center=removeMeans)
eof = EOF.eofs(neofs=num_eofs)
pca = EOF.pcs(npcs=num_eofs, pcscaling=1)
varfrac = EOF.varianceFraction()
print "Finished!"

# Reshape F into a spatial grid
eof_grid = np.reshape(eof, (eof.shape[0], nlats, nlons))

# Make the maps
bmlon, bmlat = np.meshgrid(boxlon, boxlat)
southern_lat = boxlat[0]
northern_lat = boxlat[-1]
left_lon = boxlon[0]
right_lon = boxlon[-1]
if 0 in boxlon[1:-2]:  # if we cross the gml
    left_lon = boxlon[0] - 360
Esempio n. 17
0
            x0 = surfanom  # per i cosi surface uso la anomaly di surface temperature
            #acos = acos[:, :n_alts]

            corrco = np.empty_like(acos[0])
            for i in range(acos[0].shape[0]):
                corrco[i] = np.corrcoef(x0, acos[:, i])[1, 0]

        cico, regrco, _, _ = npl.linearregre_coeff(x0, acos)

        regrcoef[(cco2, conam, 'R')] = corrco
        regrcoef[(cco2, conam, 'c')] = cico
        regrcoef[(cco2, conam, 'm')] = regrco

regrcoef['surfmean'] = np.mean(surftemps)
regrcoef['amean'] = atm_anom_mean
regrcoef['eof0'] = solver_anom.eofs(eofscaling=1)[0]
regrcoef['eof1'] = solver_anom.eofs(eofscaling=1)[1]

pickle.dump(regrcoef, open(cart_out_rep + 'regrcoef_v3.p', 'wb'))

# for conam in ['acoeff', 'bcoeff']:
#     fig = plt.figure()
#     for ialt, col in zip(range(n_alts), npl.color_set(n_alts)):
#         plt.plot(np.abs(regrcoef[(cco2, conam, 'R')][:, ialt]), alts, color = col)
#     #plt.xlim(-0.02, 0.02)
#     plt.title(conam + ' - rcorr')
#     fig.savefig(cartou + '{}_rcorr.pdf'.format(conam))

# the scalar products between the temp anomalies and the first eof of the temperature profile
dotprods = np.array([
    np.dot(te - atm_anom_mean,
Esempio n. 18
0
    if args.normalize:
        eof_data_std = np.std(eof_data, axis=1)
        eof_data = eof_data.T / np.std(eof_data, axis=1)
    else:
        #transpose so time is first dimension
        eof_data = eof_data.T

# Crete an EOF solver to do the EOF analysis.  No weights
# First dimension is assumed time by program... not true if timseries is of interest,
print("Solving for n={} modes".format(args.eof_num))
solver = Eof(eof_data, center=False)
pcs = solver.pcs(npcs=args.eof_num)
eigval = solver.eigenvalues(neigs=args.eof_num)
varfrac = solver.varianceFraction(neigs=args.eof_num)
eofs = solver.eofs(neofs=args.eof_num)
eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num)
eofcov = solver.eofsAsCovariance(neofs=args.eof_num)
"""---------------------------------Report-----------------------------------"""
### Print Select Results to file
outfile = args.outfile + '.txt'
print("EOF Results:", file=open(outfile, "w"))
print("------------", file=open(outfile, "a"))

print("File path: {}".format("/".join(filename.split('/')[:-1])),
      file=open(outfile, "a"))
for key, filename in (files.items()):
    print("Files input: {}".format(filename.split('/')[-1]),
          file=open(outfile, "a"))

print("\n\n", file=open(outfile, "a"))
Esempio n. 19
0
                    axis=1)  # add weighting function (because of the latitude)
atemp_era5 = signal.detrend(
    hgt500_era5, axis=0)  # # linearly detrend 500 hPa geopotential height data
atemp_era5_pre = np.zeros((nt * ny, nlat, nlon))
for iy in np.arange(ny):
    atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :]

### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo)
###   is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible
###   to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals
### To be consistent with other reseachers, one can add n-day moving code above.
# EOF analysis
solver = Eof(atemp_era5_pre, center=True)
pcs = solver.pcs()
mid_eig = solver.eigenvalues()
mid_eofs = solver.eofs()
eofs = solver.eofs()

### Print explained variance when using 4 EOFs
#var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig))
#print(var_explained_era5[3])
#0.5316330300316366

reconstruction_era5 = solver.reconstructedField(
    noef)  #Using 4 leading EOFs to reconstruct hgt500 field

### The Kmeans method needs a 2-D data format: number of days x horizontal fields
atemp_era5_post = np.zeros((ny * nt, nlat * nlon))
for i in np.arange(ny * nt):
    atemp_era5_post[i] = (reconstruction_era5[i]).flatten()
Esempio n. 20
0
#loop over seasons, selec data and performs boxplot
SAM_erai = np.zeros([5, 36])
SAM_s4 = np.zeros([5, len(hgt_s4.realiz.values)])
eof_erai = np.zeros([5, len(hgt_erai.latitude.values)])
eof_s4 = np.zeros([5, len(hgt_s4.latitude.values)])
sign_s4 = np.array([1, 1, 1, 1, -1])
sign_erai = np.array([1, -1, -1, -1, 1])
for i in np.arange(0, 5):
	aux = hgt_erai['z'].resample(time='QS-' + lmonth[i]).mean(dim='time',skipna=True)
	mes = datetime.datetime.strptime(lmonth[i], '%b').month
	aux = aux.sel(time= np.logical_and(aux['time.month'] == mes, aux['time.year']!=2002))
	X_zm = aux.mean(dim='longitude')
	X_an = X_zm - X_zm.mean(dim='time')
	solver = Eof(X_an.values)
	pcs = solver.pcs(npcs=1, pcscaling=1)
	eof_erai[i, :] = solver.eofs(neofs=1)[0,:]
	SAM_erai[i, :] = sign_erai[i] * pcs[:, 0]
	hgt_s4_smean = np.nanmean(np.nanmean(hgt_s4.z.values[i:i + 3, :, :, :], axis=0), axis=2)
	hgt_s4_smean = hgt_s4_smean - np.nanmean(hgt_s4_smean, axis=0)
	solver = Eof(hgt_s4_smean)
	pcs = solver.pcs(npcs=1, pcscaling=1)
	eof_s4[i, :] = solver.eofs(neofs=1)[0,:]
	SAM_s4[i, :] = sign_s4[i] * pcs[:, 0]

time = np.concatenate([np.arange(1981,2002), np.arange(2003,2018)])
ds = xr.Dataset({'SAM_index': xr.DataArray(SAM_erai, coords=[('season', season),('year', time)])})

ds.to_netcdf(RUTA + 'fogt/SAM_index_erai.nc4')

ds1 = xr.Dataset({'SAM_index': xr.DataArray(SAM_s4, coords=[('season', season),('realiz', np.arange(SAM_s4.shape[1]))])})
Esempio n. 21
0
curl_cfsr_clim = curl_cfsr.groupby('time.month').mean('time')

from eofs.standard import Eof

# ncep

coslat = np.cos(np.deg2rad(curl_ncep_clim.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(curl_ncep_clim.values, weights=wgts)
var = solver.varianceFraction()

plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()

n = 1
eof_ncep = solver.eofs(neofs=n, eofscaling=2)
pc_ncep = solver.pcs(npcs=n, pcscaling=1)
vf_ncep = var[:n]

# cfsr

coslat = np.cos(np.deg2rad(curl_cfsr_clim.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(curl_cfsr_clim.values, weights=wgts)
var = solver.varianceFraction()

plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()

n = 1
eof_cfsr = solver.eofs(neofs=n, eofscaling=2)
Esempio n. 22
0
def main(mFilepath, xFilepath, yFilepath, window, windowFlag=True):
    ## load in the data matrix as a numpy array
    m = np.loadtxt(mFilepath, dtype='float', delimiter=',', skiprows=1)
    # lon = np.loadtxt(xFilepath, dtype='float', delimiter=',', skiprows=1)
    # lat = np.loadtxt(yFilepath, dtype='float', delimiter=',', skiprows=1)
    # time =  np.arange('1958-01-01', '2014-09-22', dtype='datetime64')
    # years = range(1958, 2014)
    ## Create a list of dates spanning the study period
    base = dt.datetime(2014, 9, 21, 1, 1, 1, 1)
    dates = [base - dt.timedelta(days=x) for x in range(0, 20718)]
    date_list = [item for item in reversed(dates)]


    ## attempted to read in the raw data, but was struggling with
    ## the array dimensions
    # ncFiles = os.listdir(workspace)
    # slpList, lonList, latList, timeList = [], [], [], []
    # for fileIn in ncFiles:
    #     ncIn = Dataset(os.path.join(workspace, fileIn), 'r')
    #     slpList.append(ncIn.variables['slp'][:]/100)
    #     lonList.append(ncIn.variables['lon'][:])
    #     latList.append(ncIn.variables['lat'][:])
    #     timeList.append(ncIn.variables['time'][:])
    #     ncIn.close()

    # slp = np.array(slpList)
    # print(slp)
    # print(slp.shape)
    # # print(slp)
    # # print(np.shape(slp))

    ## create an EOF solver object and extrac the first
    ## 4 EOFs and their associated PCs. Scaling can be 
    ## applied if desired
    ## http://ajdawson.github.io/eofs/api/eofs.standard.html#eofs.standard.Eof
    solver = Eof(m)
    eofs = solver.eofs(neofs=4, eofscaling=0)
    pcs = solver.pcs(npcs=4, pcscaling=0)

    # lon, lat = np.meshgrid(lon, lat)

    ## plot the EOFs as nongeographic data for simplicity
    fig = plt.figure(figsize=(10, 10))
    for i in range(4):
        ax = fig.add_subplot(2, 2, i+1)
        lab = 'EOF' + str(i + 1)
        main =  'Unscaled ' + lab

        eofPlot = eofs[i,].reshape(17, 32)

        plt.imshow(eofPlot, cmap=plt.cm.RdBu_r)
        plt.title(main)
        cb = plt.colorbar(orientation='horizontal', cmap=plt.cm.RdBu_r)
        cb.set_label(lab, fontsize=12)

        ## Basemap failure below.  Something with the y cell size went wrong
        # bm = Basemap(projection='cyl', llcrnrlat=16.17951, urcrnrlat=68.48459,
        #              llcrnrlon=-176.0393, urcrnrlon=-98.07901, resolution='c')

        # # bm.contourf(x, y, eof1.squeeze(), clevs, cmap=plt.cm.RdBu_r)
        # bm.drawcoastlines()
        # bm.drawstates()
        # im = bm.pcolormesh(lon, lat, eofPlot, cmap=plt.cm.RdBu_r, latlon=True)
        # # bm.fillcontinents(color='coral', lake_color='aqua')
        # bm.drawparallels(np.arange(-90.,91.,15.))
        # bm.drawmeridians(np.arange(-180.,181.,30.))
        # # bm.drawmapboundary(fill_color='aqua')
        # cb = plt.colorbar(orientation='horizontal')
        # cb.set_label(lab, fontsize=12)
        # plt.title(main, fontsize=16)
        # plt.show()
    plt.show()
    ## Plot the PCs as a time series
    fig = plt.figure(figsize=(16, 16))
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series'

        pcPlot = pcs[:,i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(date_list, pcPlot, color='b')
        if windowFlag:
            plt.plot(date_list, movingaverage(pcPlot, window), 
                     color='r', linestyle='-')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

    ## Subset the dates to the last year of the dataset
    short_date = [item for item in date_list if 
                  item >= dt.datetime(2013, 6, 17) 
                  and item < dt.datetime(2014, 6, 25)]
    indices = [date_list.index(item) for item in short_date]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 year)'

        pcPlot = pcs[np.array(indices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(short_date, pcPlot, color='b')
        
        if windowFlag:
            plt.plot(short_date, 
                     movingaverage(pcPlot, window), color='r')

        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

        ## Subset the dates to the last year of the dataset
    decade = [item for item in date_list if 
              item >= dt.datetime(2004, 6, 17) 
                  and item < dt.datetime(2014, 6, 17)]
    decadeIndices = [date_list.index(item) for item in decade]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 decade)'

        pcPlot = pcs[np.array(decadeIndices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(decade, pcPlot, color='b')
        if windowFlag:
            plt.plot(decade, 
                     movingaverage(pcPlot, window), color='r')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()
Esempio n. 23
0
def calculate_correlations_and_pvalues(var_pairs, label_to_vname_to_season_to_yearlydata: dict, season_to_months: dict,
                                       region_of_interest_mask, lakes_mask=None, lats=None) -> dict:
    """

    :param var_pairs:
    :param label_to_vname_to_season_to_yearlydata:
    :param lats needed for weighting of eof solver
    :return: {(vname1, vname2): {label: {season: [corr, pvalue]}}}}
    """
    res = {}
    for pair in var_pairs:
        pair = tuple(pair)

        res[pair] = {}

        for label in label_to_vname_to_season_to_yearlydata:

            res[pair][label] = {}
            for season in season_to_months:

                years_sorted = sorted(label_to_vname_to_season_to_yearlydata[label][pair[0]][season])

                v1_dict, v2_dict = [label_to_vname_to_season_to_yearlydata[label][pair[vi]][season] for vi in range(2)]
                v1 = np.array([v1_dict[y] for y in years_sorted])
                v2 = np.array([v2_dict[y] for y in years_sorted])

                r = np.zeros(v1.shape[1:]).flatten()
                p = np.ones_like(r).flatten()

                v1 = v1.reshape((v1.shape[0], -1))
                v2 = v2.reshape((v2.shape[0], -1))

                # for hles and ice fraction get the eof of the ice and correlate
                if pair == ("hles_snow", "lake_ice_fraction"):
                    # assume that v2 is the lake_ice_fraction
                    v_lake_ice = v2

                    positions_hles_region = np.where(region_of_interest_mask.flatten())[0]
                    positions_lakes = np.where(lakes_mask.flatten())[0]

                    v_lake_ice = v_lake_ice[:, positions_lakes]
                    # calculate anomalies
                    v_lake_ice = v_lake_ice - v_lake_ice.mean(axis=0)

                    weights = np.cos(np.deg2rad(lats.flatten()[positions_lakes])) ** 0.5

                    solver = Eof(v_lake_ice, weights=weights[..., np.newaxis])
                    print(label, solver.varianceFraction(neigs=10))


                    # use the module of the PC1 to make sure it has physical meaning
                    pc1_ice = solver.pcs(npcs=1)[:, 0]

                    # debug: plot eof
                    eofs = solver.eofs(neofs=1)

                    eof_2d = np.zeros_like(lats).flatten()
                    eof_2d[positions_lakes] = eofs[:, 0] * pc1_ice
                    eof_2d = eof_2d.reshape(lats.shape)

                    plt.figure()
                    im = plt.pcolormesh(eof_2d.T)
                    plt.colorbar(im)
                    plt.show()

                    if True:
                        raise Exception


                    # print(positions)
                    for i in positions_hles_region:
                        r[i], p[i] = pearsonr(v1[:, i], pc1_ice)

                else:

                    positions = np.where(region_of_interest_mask.flatten())

                    # print(positions)
                    for i in positions[0]:
                        r[i], p[i] = pearsonr(v1[:, i], v2[:, i])

                r.shape = region_of_interest_mask.shape
                p.shape = region_of_interest_mask.shape

                r = np.ma.masked_where(~region_of_interest_mask, r)
                p = np.ma.masked_where(~region_of_interest_mask, p)

                res[pair][label][season] = [r, p]

    return res
Esempio n. 24
0
if not os.path.exists(cartou): os.mkdir(cartou)

temps = [atm_pt[(atm, 'temp')][:n_alts] for atm in allatms]
temps = np.stack(temps)

temps_anom = np.stack([
    atm_pt[(atm, 'temp')][:n_alts] - np.mean(atm_pt[(atm, 'temp')][:n_alts])
    for atm in allatms
])
atm_anom_mean = np.mean(temps_anom, axis=0)

solver = Eof(temps)
solver_anom = Eof(temps_anom)

fig = plt.figure()
for i, eo in enumerate(solver.eofs()):
    plt.plot(eo, alts, label=i)
plt.legend()
fig.savefig(cartou + 'eofs_temps.pdf')

fig = plt.figure()
for i, eo in enumerate(solver_anom.eofs()):
    plt.plot(eo, alts, label=i)
plt.legend()
fig.savefig(cartou + 'eofs_temps_anom.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver.eigenvalues())
fig.savefig(cartou + 'eigenvalues_temps.pdf')

fig = plt.figure()
Esempio n. 25
0
dat = xarray.open_dataset(
    '/home/bock/Documents/tesis/datos/ncep2_atlsur_2009_2015.nc')
clim_nc = dat['curl'].groupby('time.month').mean('time').sel(
    lat=slice(-20, -40), lon=slice(-64, -22))

coslat = np.cos(np.deg2rad(clim_nc.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(clim_nc.values, weights=wgts)
var = solver.varianceFraction()
plt.figure(1)
plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()
plt.close()
n = input('Cuantos PC extraer: ')
n = int(n)
eof = solver.eofs(neofs=n, eofscaling=2)
pc = solver.pcs(npcs=n, pcscaling=1)
vf = var[:n]

fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[0] * 1e7, cm.GMT_no_green,
                       np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m')
plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof1_ncep.png',
            bbox_inches='tight')
plt.show()
fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[1] * 1e7, cm.GMT_no_green,
                       np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m')
plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof2_ncep.png',
            bbox_inches='tight')
plt.show()

dat1 = xarray.open_dataset(
Esempio n. 26
0
def PCA_Analyze(name, framestart, framestop, destination, numberpca=None):
    """Completes unweighted and unscaled Principle Component Analyzis on data. 

    **Arguments:**

    *name*
            The complete name of the numpy array file from the directory where the processing program is
            found. Put as string (include quotes). Must be an npz file. Specifically, this is the numpy
            array file that has the UU,VV,WW data in it. Instead of putting the name of the numpy file
            (since there are a large number of them) input an asterisk (*).
            
    *framestart*
            The first frame number in the sequence of frames to be analyzed.

    *framestop*
            The last frame number in the sequence of frames to be analyzed.

    
    *destination*
            File location for the graph to be saved. Put in quotes. Example:
            'out/Vertical Velocity/arrays/another/graph.png'
            
    
    **Optional keyword arguments:**

    *numberpca*
            Number of valid eigenvalues/PCAs to be calculated. Automatically set to determine all of them.
            
   **Example:**
            PCA_Analyze('../out/velocity.npz',0,5,'../out/mvavgtur.png',numberpca=4)
                    
    """
    #####Creates Lists and Dictionaries to be used later#####
    UU = {}
    lUU = []
    VV = {}
    lVV = []

    #####Extracts numpy array data and puts it into the dictionaries for use in analysis#####
    for np_name in glob.glob(name):
        with np.load(np_name) as data:
            UU[re.findall(r'\d+', np_name)[-1]] = data['UU']
            VV[re.findall(r'\d+', np_name)[-1]] = data['VV']

    #####Takes the data from the dictionaries, sorts them, and then puts them into lists. Then turns the list into a numpy array.#####
    uframes = UU.keys()
    uframes.sort()
    vframes = VV.keys()
    vframes.sort()

    for i in uframes:
        u = UU[i]
        lUU.append(u)

    for i in vframes:
        v = VV[i]
        lVV.append(v)

    luu = np.asarray(lUU)

    lvv = np.asarray(lVV)

    #####Puts the U and V components into one complex array with the U as the real component and the V as the imaginary#####
    velgrid = luu + (1.j * lvv)

    #####PCA#####
    solver = Eof(velgrid[framestart:framestop, :, :])
    pca = solver.eofs(neofs=numberpca)
    eigen = solver.eigenvalues(neigs=numberpca)

    pca = np.array(pca)
    eigen = np.array([eigen])
    intermed = eigen[0].shape
    length = intermed[0]
    print length
    #####Graphs each PCA#####
    c = 0
    for i in range(length):
        UU = pca.real[i, :, :]
        VV = pca.imag[i, :, :]
        eig = np.array_str(eigen[0][i])
        (a, b) = pca[0].shape
        y, x = np.mgrid[0:a, 0:b]
        plt.figure()
        plt.streamplot(x, y, UU * -1., VV * -1., cmap='nipy_spectral')
        plt.suptitle("PCA Analysis. Associated Percent Variance: ")
        plt.title(eig, fontsize=10)
        plt.savefig(destination % i)
        plt.close()
        c += 1
Esempio n. 27
0

		data=load_regional(filenames,ny,nx)
		data=np.ma.masked_values(data,2.e+20)
		print "data loaded",data.shape
	
		# Set up info
		plt.set_cmap('RdBu')
		neofs=5
		nens=data.shape[0]
		nwanted=57
	
		solver=Eof(data)
		print 'set up EOF solver'
		pcs=solver.pcs(npcs=neofs,pcscaling=1)
		eofs=solver.eofs(neofs=neofs)
		varfrac=solver.varianceFraction(neigs=neofs)
		print 'calculated EOFs'
		print 'printing EOFs'
		for i in range(neofs):
			print 'EOF',i
			plt.clf()
			plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i])
		print "plotting histograms of PCs"
		for i in range(3):
			plt.clf()
			plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i))
			plt.ylim([0,.6])
			plt.savefig(output_dir+'/histogram_pc'+str(i)+'.png')
		print "plotting mean and stdev of ensemble"
		plot_region_pnw(data[:].mean(0),lat_coord,lon_coord,0,-1,0,-1,'mean',data.mean())
Esempio n. 28
0
filename1 = 'sine_wave_data1.nc'
filename2 = '2D_bulls_eyes.nc'
a = Dataset(filename1, mode='r')
b = Dataset(filename2, mode='r')
dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a))
dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b))
sinData = dataset1['data'].T
sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0)
sinData = sinData.values
bullseyeData = dataset2['data']

#%% EOF analysis
solver = Eof(sinData)
eigenvalues = solver.eigenvalues()  # Get eigenvalues
EOFs = solver.eofs(eofscaling=0)  # Get EOFs
EOFs_reg = solver.eofsAsCorrelation(
)  # Get EOFs as correlation b/w PCs &  orig data
PCs = solver.pcs(pcscaling=1)  # Get PCs

# Get variance explained and # of PCs
VarExplain = np.round(solver.varianceFraction() * 100, 1)
numPCs2Keep = cumSUM(VarExplain, 90)

# Calculate EOFs
EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0])  # Get EOF 1 & scale it
EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1])  # Get EOF 2 & scale it
EOF1_reg = EOFs_reg[0, :]
EOF2_reg = EOFs_reg[1, :]
stdPC1 = PCs[:, 0]
stdPC2 = PCs[:, 1]