def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400):
    # load in the PCs and EOFs
    histo_sy = 1899
    histo_ey = 2010
    
#    monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
#    monthly_pcs = load_data(monthly_pc_fname)
#    monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
#    monthly_eofs = load_sst_data(monthly_eof_fname, "sst")

    monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(monthly_residuals_fname, 'r')
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    monthly_residuals = numpy.ma.masked_equal(var[:], mv)

    # weights for reconstruction / projection
    coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.))
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(monthly_residuals, center=False, weights=wgts)
    monthly_pcs = eof_solver.pcs(npcs=n_pcs)
    monthly_eofs = eof_solver.eofs(neofs=n_pcs)
    
    # get the explanation of variance and calculate the scalar from it
    M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs))
    
    # get the number of months to predict the PCs for and create the storage
    histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods()
    n_mnths = 12*(rcp_ey - histo_sy)
    predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f")

    # fit an AR process to the first ~20 pcs
    for pc in range(0, n_pcs):
        # create the model
        arn = ARN(monthly_pcs[:,pc].squeeze())
        # fit the model to the data
        res = arn.fit()
        arp = res.k_ar
        # create a timeseries of predicted values
        predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11)

    # reconstruct the field and return
    # reconstruct the field
    monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts)
    return monthly_intvar
Ejemplo n.º 2
0
def eof_computation(var, varunits, lat, lon):
    #----------------------------------------------------------------------------------------
    print(
        '____________________________________________________________________________________________________________________'
    )
    print('Computing the EOFs and PCs')
    #----------------------------------------------------------------------------------------
    # EOF analysis of a data array with spatial dimensions that
    # represent latitude and longitude with weighting. In this example
    # the data array is dimensioned (ntime, nlat, nlon), and in order
    # for the latitude weights to be broadcastable to this shape, an
    # extra length-1 dimension is added to the end:
    weights_array = np.sqrt(np.cos(np.deg2rad(lat)))[:, np.newaxis]

    start = datetime.datetime.now()
    solver = Eof(var, weights=weights_array)
    end = datetime.datetime.now()
    print('EOF computation took me %s seconds' % (end - start))

    #ALL VARIANCE FRACTIONS
    varfrac = solver.varianceFraction()
    acc = np.cumsum(varfrac * 100)

    #------------------------------------------PCs unscaled  (case 0 of scaling)
    pcs_unscal0 = solver.pcs()
    #------------------------------------------EOFs unscaled  (case 0 of scaling)
    eofs_unscal0 = solver.eofs()

    #------------------------------------------PCs scaled  (case 1 of scaling)
    pcs_scal1 = solver.pcs(pcscaling=1)

    #------------------------------------------EOFs scaled (case 2 of scaling)
    eofs_scal2 = solver.eofs(eofscaling=2)

    return solver, pcs_scal1, eofs_scal2, pcs_unscal0, eofs_unscal0, varfrac
Ejemplo n.º 3
0
def eofs_as(dat):
    A = climatologia_xarray(dat['curl']).values
    global land
    EC, WC, land = get_coasts(dat.lat, dat.lon)

    msk = np.empty(np.shape(A))
    for i in range(0, len(A[:,0,0])):
        msk[i,:,:] = land
        B = np.ma.array(A, mask=msk)
    from get_eddof import get_eddof
    edof = np.empty([len(dat.lat), len(dat.lon)])
    for i in range(0, len(dat.lat)):
        for j in range(0, len(dat.lon)):
            if msk[0,i,j] == False:
                edof[i,j] = get_eddof(B[:,i,j])
            else:
                edof[i,j] = np.nan

    dof = int(np.nanmean(edof))
    coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(B, center=True, weights=wgts, ddof=dof)

    eof = solver.eofs(neofs=10, eofscaling=2)
    pc = solver.pcs(npcs=10, pcscaling=1)
    varfrac = solver.varianceFraction()
    eigvals = solver.eigenvalues()

    x, y = np.meshgrid(dat.lon, dat.lat)

    return eof, pc, varfrac, x, y, edof
Ejemplo n.º 4
0
    def compute_ipo(sst_anoms, years_pass=11, N=2.0):
        high = np.int(years_pass * 12.)
        B, A = signal.butter(N, N / high, btype='lowpass', output='ba')

        def filter_SST(x):
            if any(np.isnan(x)):
                z = x
            else:
                z = signal.filtfilt(B, A, x)
            return z

        sst_anoms['sst_filtered'] = (('time', 'lat', 'lon'),
                                     np.apply_along_axis(
                                         filter_SST, 0,
                                         sst_anoms['sst_masked'].data))

        lat = sst_anoms['lat'].values
        lon = sst_anoms['lon'].values
        lons, lats = np.meshgrid(lon, lat)
        coslat = np.cos(np.deg2rad(lat))
        wgts = np.sqrt(coslat)[..., np.newaxis]
        sst_anoms.load()
        X = sst_anoms['sst_filtered'].data
        solver = Eof(X, weights=wgts)
        eofs = solver.eofsAsCorrelation(neofs=5)
        pcs = solver.pcs(npcs=5, pcscaling=1)
        PCs = pd.DataFrame(pcs, index=sst_anoms['time'].to_index())
        PCs_monthly = solver.projectField(sst_anoms['sst_masked'].data, 5)
        PCs_monthly = pd.DataFrame(PCs_monthly,
                                   index=sst_anoms['time'].to_index())
        return eofs, PCs, lons, lats, PCs_monthly
def calc_HadISST_residual_EOFs(histo_sy, histo_ey, run_n):
    # load the already calculated residuals
    resid_fname = get_HadISST_residuals_fname(histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(resid_fname, 'r')
    lats_var = fh.variables["latitude"]
    lons_var = fh.variables["longitude"]
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    sst_data = numpy.ma.masked_equal(var[:], mv)

    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_data, center=False, weights=wgts)
    pcs = eof_solver.pcs(npcs=None)
    eofs = eof_solver.eofs(neofs=None)

    # get the output names
    out_eofs_fname = get_HadISST_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_residual_PCs_fname(histo_sy, histo_ey, run_n)
    
    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    save_pcs(out_pcs_fname, pcs, attrs)
    fh.close()
Ejemplo n.º 6
0
def calculate_EAsia_rm_eofs(data,
                            lats,
                            lons,
                            lat_min=20,
                            lat_max=50,
                            lon_min=110,
                            lon_max=180):
    """ Calculates EOFs over the East Asian region.
    Regresses the principal components back onto the original data"""
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    data_EAsia = data[:, lat_mask, :][:, :, lon_mask]
    # calculate EOFs
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(data_EAsia, weights=wgts)
    var_frac = solver.varianceFraction()
    pcs = solver.pcs(npcs=3, pcscaling=1)
    # regress first modes onto original data
    reg_pc1, pval_pc1 = regress_map.regress_map(pcs[:, 0],
                                                data,
                                                map_type='regress')
    reg_pc2, pval_pc2 = regress_map.regress_map(pcs[:, 1],
                                                data,
                                                map_type='regress')
    reg_pc3, pval_pc3 = regress_map.regress_map(pcs[:, 2],
                                                data,
                                                map_type='regress')
    return var_frac, reg_pc1, pval_pc1, reg_pc2, pval_pc2, reg_pc3, pval_pc3
def reconstruct_data(arr, neofs=16):
    if type(neofs) == int:
        neofs = [neofs]

    solver = Eof(arr, center=False)
    for n in neofs: 
        reconstructed = solver.reconstructedField(neofs=n)
        pcs = solver.pcs(npcs=n)
        eofs = solver.eofs(neofs=n)
        yield reconstructed, pcs, eofs
Ejemplo n.º 8
0
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400):
    # load in the PCs and EOFs
    histo_sy = 1899
    histo_ey = 2010

    #    monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    #    monthly_pcs = load_data(monthly_pc_fname)
    #    monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    #    monthly_eofs = load_sst_data(monthly_eof_fname, "sst")

    monthly_residuals_fname = get_HadISST_monthly_residuals_fname(
        histo_sy, histo_ey, run_n)
    # open netcdf_file
    fh = netcdf_file(monthly_residuals_fname, 'r')
    attrs = fh.variables["sst"]._attributes
    mv = attrs["_FillValue"]
    var = fh.variables["sst"]
    monthly_residuals = numpy.ma.masked_equal(var[:], mv)

    # weights for reconstruction / projection
    coslat = numpy.cos(
        numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.))
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(monthly_residuals, center=False, weights=wgts)
    monthly_pcs = eof_solver.pcs(npcs=n_pcs)
    monthly_eofs = eof_solver.eofs(neofs=n_pcs)

    # get the explanation of variance and calculate the scalar from it
    M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs))

    # get the number of months to predict the PCs for and create the storage
    histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods()
    n_mnths = 12 * (rcp_ey - histo_sy)
    predicted_pcs = numpy.zeros([n_mnths + 12, n_pcs], "f")

    # fit an AR process to the first ~20 pcs
    for pc in range(0, n_pcs):
        # create the model
        arn = ARN(monthly_pcs[:, pc].squeeze())
        # fit the model to the data
        res = arn.fit()
        arp = res.k_ar
        # create a timeseries of predicted values
        predicted_pcs[:, pc] = M * arn.predict(res.params,
                                               noise='all',
                                               dynamic=True,
                                               start=arp,
                                               end=n_mnths + arp + 11)

    # reconstruct the field and return
    # reconstruct the field
    monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs],
                                       n_pcs, wgts)
    return monthly_intvar
Ejemplo n.º 9
0
    def eof(in_bands):
        data = np.array([in_bands[i].data for i in range(len(in_bands))])

        #take eof over time dimension
        solver = Eof(data)

        eof1 = solver.eofs(neofs=1)[0, :]
        cube = in_bands[0].copy()
        cube.data = eof1

        pc1 = solver.pcs(pcscaling=1, npcs=1)[:, 0]
        var_frac = solver.varianceFraction(neigs=1)[0]
        return cube, pc1, var_frac
Ejemplo n.º 10
0
def get_EOF(data, order=1, mode='corr'):
    '''
    :param data: image data, sst or t300, [month, lon, lat]
    :param order: int
    return: eof_corr, eof_cova [order, lon, lat],
            pc [month, order]
    '''
    solver = Eof(data)
    if mode == 'corr':
        res = solver.eofsAsCorrelation(neofs=order)
    elif mode == 'cova':
        res = solver.eofsAsCovariance(neofs=order)
    elif mode == 'pc':
        res = solver.pcs(npcs=order, pcscaling=1)
    return res
Ejemplo n.º 11
0
def E_Cindex(SSTA, lat, lon):
    """
    E and C indices to define EP&CP
    two orthogonal axes are rotated 45° relative to the principal components of SSTA
    SSTA with (timme,lat,lon)
    """
    #tropical pacific:120E-80W(60-140),20S-20N(35-55)
    SSTA_TP = SSTA[:, (lat <= 30) & (lat >= -30), :]
    SSTA_TP = SSTA_TP[:, :, (lon <= 280) & (lon >= 120)]

    lat1 = lat[(lat <= 30) & (lat >= -30)]
    lon1 = lon[(lon <= 280) & (lon >= 120)]
    #EOF analysis and to get the first 2 pcs
    #coslat=np.cos(np.deg2rad(np.arange(-20,21,2)))
    solver = Eof(SSTA_TP[29:, :, :])
    pcs = solver.pcs(npcs=2, pcscaling=1)
    eof = solver.eofsAsCorrelation(neofs=2)
    a = eof[0, (lat1 <= 5) & (lat1 >= -5), :]
    b = eof[1, (lat1 <= 5) & (lat1 >= -5), :]

    if np.mean(a[:, (lon1 <= 240) & (lon1 >= 190)], (0, 1)) < 0:
        pcs[:, 0] = -pcs[:, 0]

    if np.mean(b[:, (lon1 <= 240) & (lon1 >= 190)], (0, 1)) > 0:
        pcs[:, 1] = -pcs[:, 1]

    #do the 45rotation
    C_index = (pcs[:, 0] + pcs[:, 1]) / np.sqrt(2)
    E_index = (pcs[:, 0] - pcs[:, 1]) / np.sqrt(2)

    #find EP&CP years
    # =============================================================================
    #     CI_std=(C_index-np.mean(C_index))/np.std(C_index)
    #     EI_std=(E_index-np.mean(E_index))/np.std(E_index)
    # =============================================================================

    # =============================================================================
    #     cindex=pd.Series(C_index)
    #     eindex=pd.Series(E_index)
    #
    #
    #     #find EP&CP years
    #     CI_std=(cindex-cindex.rolling(window=30).mean())/cindex.rolling(window=30).std()
    #     EI_std=(eindex-eindex.rolling(window=30).mean())/eindex.rolling(window=30).std()
    # =============================================================================

    return C_index, E_index
def calculate_U_EOF(U,
                    SST,
                    THF,
                    lats_ua,
                    lons_ua,
                    lats_SST,
                    lons_SST,
                    lats_THF,
                    lons_THF,
                    lat_min=lat_min,
                    lat_max=lat_max,
                    lon_min=lon_min,
                    lon_max=lon_max,
                    npcs=3):
    """Function to select a given region and return the first few principal component time series
    then regress the pcs back onto the zonal wind and SST."""
    # select region
    lat_mask = (lats_ua >= lat_min) & (lats_ua <= lat_max)
    lon_mask = (lons_ua >= lon_min) & (lons_ua <= lon_max)
    #print(lats.shape,lons.shape,U.shape,lats[lat_mask].shape,lons[lon_mask].shape)
    U_region = U[:, lat_mask, :][:, :, lon_mask]
    U_climatology = np.mean(U, axis=0)

    # Calculate EOFs
    coslat = np.cos(np.deg2rad(lats_ua[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(U_region, weights=wgts)
    pcs = solver.pcs(npcs=npcs, pcscaling=1)
    variance_fraction = solver.varianceFraction()

    # perform regressions
    regress_U = np.zeros([npcs, lats_ua.shape[0], lons_ua.shape[0]])
    regress_SST = np.zeros([npcs, lats_SST.shape[0], lons_SST.shape[0]])
    regress_THF = np.zeros([npcs, lats_THF.shape[0], lons_THF.shape[0]])
    for pc_number in np.arange(npcs):
        regress_U[pc_number, :, :] = regress_map(pcs[:, pc_number],
                                                 U,
                                                 map_type='corr')[0]
        regress_SST[pc_number, :, :] = regress_map(pcs[:, pc_number],
                                                   SST,
                                                   map_type='corr')[0]
        regress_THF[pc_number, :, :] = regress_map(pcs[:, pc_number],
                                                   THF,
                                                   map_type='corr')[0]

    return pcs, regress_U, regress_SST, regress_THF, variance_fraction[:
                                                                       npcs], U_climatology
def calculate_IOBM(data, lats, lons, times, t_units, calendar):
    """ Calculate the Indian Ocean basin mode as the first EOF over the region 
    20S-20N, 40E-110E.
    See Yang et al (2007) doi:10.1029/2006GL028571"""
    data[np.abs(data) > 1e3] = np.nan
    annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar)
    lat_min, lat_max = -20, 20
    lon_min, lon_max = 40, 110
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    IO_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(IO_SST, weights=wgts)
    IOBM = solver.pcs(npcs=1, pcscaling=1).flatten()
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    if np.nanmean(EOF1) < 0: IOBM = -IOBM
    IOBM = (IOBM - np.mean(IOBM)) / np.std(IOBM)
    return IOBM
def calculate_IPO(data, lats, lons, times, t_units, calendar):
    """ Calculate the Inter-decadal Pacific Oscillation index 
    Calculated as the first EOF of SST 60S to 60N over the
    Pacific  """
    data[np.abs(data) > 1e3] = np.nan  # set unreasonably high values to NaN
    annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar)
    lat_min, lat_max = -60, 60
    lon_min, lon_max = 120, 270
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(Pacific_SST, weights=wgts)
    IPO = solver.pcs(npcs=1, pcscaling=1).flatten()
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    if np.nanmean(EOF1) < 0: IPO = -IPO
    IPO = (IPO - np.mean(IPO)) / np.std(IPO)
    return IPO
def calculate_PDO(data, lats, lons, times, t_units, calendar):
    """ Calculate the Pacific Decadal Oscillation index as the first PC of SST
    between 20N and 70N
    See Newman et al (2016) doi:10.1175/JCLI-D-15-0508.1"""
    data[np.abs(data) > 1e3] = np.nan  # set unreasonably high values to NaN
    global_mean_removed = data - global_mean(data, lats).reshape(
        times.shape[0], 1, 1)
    annual_cycle_removed = remove_annual_cycle(global_mean_removed, times,
                                               t_units, calendar)
    lat_min, lat_max = 20, 70
    lon_min, lon_max = 120, 270
    lat_mask = (lats >= lat_min) & (lats <= lat_max)
    lon_mask = (lons >= lon_min) & (lons <= lon_max)
    N_Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask]
    coslat = np.cos(np.deg2rad(lats[lat_mask]))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(N_Pacific_SST, weights=wgts)
    EOF1 = solver.eofs(neofs=1)[0, :, :]
    PDO = solver.pcs(npcs=1, pcscaling=1).flatten()
    if np.nanmean(EOF1[:, lons[lon_mask] > 210]) < 0: PDO = -PDO
    PDO = (PDO - np.mean(PDO)) / np.std(PDO)
    return PDO
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22):
    # load the already calculated residuals
    resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # note that we don't have to subtract the annual cycle any more as the
    # residuals are with respect to a smoothed version of the monthly ssts
    
    resid_mon_fh = netcdf_file(resid_fname, 'r')
    sst_var = resid_mon_fh.variables["sst"]
    lats_var = resid_mon_fh.variables["latitude"]
    lons_var = resid_mon_fh.variables["longitude"]
    attrs = sst_var._attributes
    mv = attrs["_FillValue"]
    ssts = numpy.array(sst_var[:])
    sst_resids = numpy.ma.masked_less(ssts, -1000)
    
    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_resids, center=True, weights=wgts)
    pcs = eof_solver.pcs(npcs=n_eofs)
    eofs = eof_solver.eofs(neofs=n_eofs)
    varfrac = eof_solver.varianceFraction(neigs=n_eofs)
    evs = eof_solver.eigenvalues(neigs=n_eofs)
    evs = evs.reshape([1,evs.shape[0]])
    print evs.shape
    
    # get the output names
    out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    out_evs_fname  = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n)

    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]])
    save_pcs(out_pcs_fname, out_pcs, attrs)
    save_eigenvalues(out_evs_fname, evs, attrs)
    resid_mon_fh.close()
Ejemplo n.º 17
0
    def PCA(self, field_name):

        field_name = field_name
        start_interv = self.start_pca
        end_interv = self.end_pca
        observationPeriod = 'data_' + str(start_interv) + '_to_' + str(end_interv)
        modelData = np.load(self.directory_data + '/' + field_name + '_' + observationPeriod + '.npy')

        # Velocity is a 3D vector and needs to be reshaped before the PCA
        if 'Velocity' in field_name:
            modelData = np.reshape(modelData, (modelData.shape[0], modelData.shape[1] * modelData.shape[2]), order='F')

        # Standardise the data with mean 0
        meanData = np.nanmean(modelData, 0)
        stdData = np.nanstd(modelData)
        modelDataScaled = (modelData - meanData) / stdData

        #PCA solver
        solver = Eof(modelDataScaled)

        # Principal Components time-series
        pcs = solver.pcs()
        # Projection
        eof = solver.eofs()
        # Cumulative variance
        varianceCumulative = np.cumsum(solver.varianceFraction())

        np.save(self.directory_data + '/' + 'pcs_' + field_name + '_' + observationPeriod,
                pcs)
        np.save(self.directory_data + '/' + 'eofs_' + field_name + '_' + observationPeriod,
                eof)
        np.save(self.directory_data + '/' + 'varCumulative_' + field_name + '_' + observationPeriod,
                varianceCumulative)
        np.save(self.directory_data + '/' + 'mean_' + field_name + '_' + observationPeriod,
                meanData)
        np.save(self.directory_data + '/' + 'std_' + field_name + '_' + observationPeriod,
                stdData)
Ejemplo n.º 18
0
sst = ncin.variables['sst'][:]
lons = ncin.variables['longitude'][:]
lats = ncin.variables['latitude'][:]
ncin.close()

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(lats))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(sst, weights=wgts)

# Retrieve the leading EOF, expressed as the correlation between the leading
# PC time series and the input SST anomalies at each grid point, and the
# leading PC time series itself.
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)

# Plot the leading EOF expressed as correlation in the Pacific domain.
clevs = np.linspace(-1, 1, 11)
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190))
fill = ax.contourf(lons, lats, eof1.squeeze(), clevs,
                   transform=ccrs.PlateCarree(), cmap=plt.cm.RdBu_r)
ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k')
cb = plt.colorbar(fill, orientation='horizontal')
cb.set_label('correlation coefficient', fontsize=12)
plt.title('EOF1 expressed as correlation', fontsize=16)

# Plot the leading PC time series.
plt.figure()
years = range(1962, 2012)
plt.plot(years, pc1, color='b', linewidth=2)
Ejemplo n.º 19
0
def main():
    folder_path = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009"

    label_to_hles_dir = OrderedDict([
        ("Obs",
         Path(
             "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009"
         )),
        ("CRCM5_NEMO",
         Path(
             "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009"
         )),
        ("CRCM5_HL",
         Path(
             "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_Hostetler_1980-2009"
         )),
        # ("CRCM5_NEMO_TT_PR", Path("/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_based_on_TT_PR_1980-2009"))
    ])

    label_to_line_style = {
        "Obs": "k.-",
        "CRCM5_NEMO": "r",
        "CRCM5_HL": "b",
        "CRCM5_NEMO_TT_PR": "g"
    }

    vname = "snow_fall"
    units = "cm"
    #vname = "lkeff_snowfall_days"
    #units = "days"
    npc = 1

    b = Basemap(lon_0=180,
                llcrnrlon=common_params.great_lakes_limits.lon_min,
                llcrnrlat=common_params.great_lakes_limits.lat_min,
                urcrnrlon=common_params.great_lakes_limits.lon_max,
                urcrnrlat=common_params.great_lakes_limits.lat_max,
                resolution="i")

    label_to_y_to_snfl = {}
    label_to_pc = {}

    label_to_eof = OrderedDict()
    label_to_varfraction = OrderedDict()

    mask = None

    plot_utils.apply_plot_params(font_size=12)

    fig = plt.figure()

    years = None
    lats = None
    lons = None
    the_mask = None
    for label, folder in label_to_hles_dir.items():

        y_to_snfl = {}
        y_to_snfldays = {}

        for the_file in folder.iterdir():
            if not the_file.name.endswith(".nc"):
                continue

            with Dataset(str(the_file)) as ds:
                print(ds)
                snfl = ds.variables[vname][:]
                year_current = ds.variables["year"][:]

                if mask is None:
                    lons, lats = [ds.variables[k][:] for k in ["lon", "lat"]]
                    lons[lons > 180] -= 360
                    mask = maskoceans(lons,
                                      lats,
                                      lons,
                                      inlands=True,
                                      resolution="i")

                y_to_snfl[year_current[0]] = snfl[0]

        years_ord = sorted(y_to_snfl)

        label_to_y_to_snfl[label] = y_to_snfl

        if years is None:
            years = years_ord

        data = np.ma.array([y_to_snfl[y] for y in years_ord])

        if the_mask is None:
            the_mask = data[0].mask

        solver = Eof(data)

        eof = solver.eofsAsCorrelation()
        # eof = solver.eofs(neofs=4)

        pc = solver.pcs(pcscaling=0)
        label_to_varfraction[label] = solver.varianceFraction()

        label_to_pc[label] = pc
        label_to_eof[label] = eof

        # change the signs of pcs and eofs
        if label not in ["CRCM5_HL"]:
            label_to_pc[label][:, 0] *= -1
            label_to_eof[label][0, :, :] *= -1

        if label in ["CRCM5_NEMO"]:
            label_to_pc[label][:, 1:] *= -1
            label_to_eof[label][1:, :, :] *= -1

        # save data for Diro
        print(pc.shape)
        df = pd.DataFrame(data=pc, index=years_ord)
        df.to_csv("{}_{}_pc.csv".format(vname, label))

        plt.plot(years_ord,
                 label_to_pc[label][:, 0].copy(),
                 label_to_line_style[label],
                 linewidth=2,
                 label=label)

    plt.legend(loc="upper left")

    plt.ylabel(units)
    plt.xlabel("Year")
    plt.xticks(years)

    plt.grid()
    plt.gcf().autofmt_xdate()
    plt.savefig(str(label_to_hles_dir["Obs"].joinpath("pc{}_{}.png".format(
        npc, vname))),
                bbox_inches="tight")

    plt.close(fig)

    # plot the eofs

    plot_utils.apply_plot_params(font_size=12, width_cm=30, height_cm=6)

    lons[lons < 0] += 360
    xx, yy = b(lons, lats)
    for eof_ind in range(3):
        col = 0

        fig = plt.figure()
        gs = GridSpec(1, len(label_to_eof), wspace=0.02)

        for label, eof_field in label_to_eof.items():

            ax = fig.add_subplot(gs[0, col])
            to_plot = eof_field[eof_ind]
            im = b.pcolormesh(xx,
                              yy,
                              to_plot,
                              cmap=cm.get_cmap("bwr", 10),
                              vmin=-0.25,
                              vmax=0.25,
                              ax=ax)
            cb = b.colorbar(im, extend="both")
            cb.ax.set_visible(col == len(label_to_eof) - 1)
            ax.set_title("{} (explains {:.2f}$\sigma^2$)".format(
                label, label_to_varfraction[label][eof_ind]))

            col += 1

            b.drawcoastlines(ax=ax)

        # fig.tight_layout()
        plt.savefig(str(label_to_hles_dir["Obs"].joinpath(
            "eof_raw_{}_{}.png".format(eof_ind + 1, vname))),
                    bbox_inches="tight",
                    dpi=300)
        plt.close(fig)
Ejemplo n.º 20
0
def EP_CPindex(SSTA, lat, lon):
    """
    EP_CPindex method to define CP&EP
    FOR CP: regression of Nino1+2 SSTA associated with eastern warming is removed
    FOR EP: regression of Nino 4 SSTA associated with cetral warming is removed
    both EOF to find 1st PC time series (exceed on standard deviation-1 sigma)
    
    SSTA with (time,lat,lon) with masked values and nan
    """
    #Nino1+2:90W-80W(135-140),0-10S(45-50)
    ssta12 = SSTA[:, (lat <= 10) & (lat >= 0), :]
    Nino12 = np.ma.average(ssta12[:, :, (lon <= 280) & (lon >= 270)], (1, 2))
    Nino12 = np.ma.getdata(Nino12)

    #Nino4:160E-150W(80-105),5N-5S(43-47)
    ssta4 = SSTA[:, (lat <= 5) & (lat >= -5), :]
    Nino4 = np.ma.average(ssta4[:, :, (lon <= 210) & (lon >= 160)], (1, 2))
    Nino4 = np.ma.getdata(Nino4)

    #tropical pacific:120E-80W(60-140),20S-20N(35-55)
    SSTA_TP = SSTA[:, (lat <= 30) & (lat >= -30), :]
    SSTA_TP = SSTA_TP[:, :, (lon <= 280) & (lon >= 120)]

    lat1 = lat[(lat <= 30) & (lat >= -30)]
    lon1 = lon[(lon <= 280) & (lon >= 120)]

    SSTA_TP12 = np.zeros(SSTA_TP.shape)
    SSTA_TP4 = np.zeros(SSTA_TP.shape)

    for i in range(0, SSTA_TP.shape[1]):
        for j in range(0, SSTA_TP.shape[2]):
            k12, _, _, _, _ = stats.linregress(Nino12, SSTA_TP[:, i, j])
            SSTA_TP12[:, i, j] = SSTA_TP[:, i, j] - k12 * Nino12
            k4, _, _, _, _ = stats.linregress(Nino4, SSTA_TP[:, i, j])
            SSTA_TP4[:, i, j] = SSTA_TP[:, i, j] - k4 * Nino4

    #EOF analysis
    #coslat=np.cos(np.deg2rad(np.arange(-20,21,2)))
    #wgt=np.sqrt(coslat)[..., np.newaxis]
    solver12 = Eof(SSTA_TP12)
    eof12 = solver12.eofsAsCorrelation(neofs=1)
    PC12 = solver12.pcs(npcs=1, pcscaling=1)
    PC12 = PC12[:, 0]
    a = eof12[:, (lat1 <= 5) & (lat1 >= -5), :]
    if np.mean(a[:, :, (lon1 <= 240) & (lon1 >= 190)].squeeze(), (0, 1)) < 0:
        PC12 = -PC12

    solver4 = Eof(SSTA_TP4)
    eof4 = solver4.eofsAsCorrelation(neofs=1)
    PC4 = solver4.pcs(npcs=1, pcscaling=1)
    PC4 = PC4[:, 0]
    b = eof4[:, (lat1 <= 5) & (lat1 >= -5), :]

    if np.mean(b[:, :, (lon1 <= 240) & (lon1 >= 190)].squeeze(), (0, 1)) < 0:
        PC4 = -PC4

    #PC12 is for cp definition and PC4 is for EP

    #standardized


# =============================================================================
#     pc12_std=(PC12-np.mean(PC12))/np.std(PC12)
#     pc4_std=(PC4-np.mean(PC4))/np.std(PC4)
# =============================================================================
# =============================================================================
#     pc12=pd.Series(PC12[:,0])
#     pc4=pd.Series(PC4[:,0])
#     pc12_std=(pc12-pc12.rolling(window=30).mean())/pc12.rolling(window=30).std()
#     pc4_std=(pc4-pc4.rolling(window=30).mean())/pc4.rolling(window=30).std()
# =============================================================================

    return PC12, PC4  #CP, EP
def calcSeasonalEOF(anomslp,years,year1,year2,monthind,eoftype,pctype):
    """
    Calculates EOF over defined seasonal period
    
    Parameters
    ----------
    anomslp : 4d array [year,month,lat,lon]
        sea level pressure anomalies
    years : 1d array
        years in total
    year1 : integer
        min month
    year2 : integer
        max month
    monthind : 1d array
        indices for months to be calculated in seasonal mean
    eoftype : integer
        1,2
    pctype : integer
        1,2
    
    Returns
    -------
    eof : array
        empirical orthogonal function
    pc : array
        principal components
    """
    print '\n>>> Using calcSeasonalEOF function!'
    
    ### Slice years
    if np.isfinite(year1):
        if np.isfinite(year2):
            yearqq = np.where((years >= year1) & (years <= year2))
            anomslp = anomslp[yearqq,:,:,:].squeeze()
        else:
            print 'Using entire time series for this EOF!'
    else:
        print 'Using entire time series for this EOF!'   
    print 'Sliced time period for seasonal mean!'
    
    ### Average over months
#    anomslp = anomslp[:,monthind,:,:]
#    anomslp = np.nanmean(anomslp[:,:,:,:],axis=1)
    
    print 'Sliced month period for seasonal mean!'
    
    anomslpq = anomslp
    
    pc = np.empty((anomslpq.shape[0],2,anomslpq.shape[1]))
    for i in xrange(anomslp.shape[1]):
        
        anomslp = anomslpq[:,i,:,:]

        ### Calculate EOF
        # Create an EOF solver to do the EOF analysis. Square-root of cosine of
        # latitude weights are applied before the computation of EOFs.
        coslat = np.cos(np.deg2rad(lats)).clip(0., 1.)
        wgts = np.sqrt(coslat)[..., np.newaxis]
        solver = Eof(anomslp, weights=wgts)
        
        # Retrieve the leading EOF, expressed as the covariance between the 
        # leading PC time series and the input SLP anomalies at each grid point.
        eof = solver.eofsAsCovariance(neofs=eoftype)
        pc[:,:,i] = solver.pcs(npcs=pctype, pcscaling=1)
        
        print 'EOF and PC computed!'
    
    print '*Completed: EOF and PC Calculated!\n'
    
    return eof,pc
Ejemplo n.º 22
0
way = np.cos(cos)
weightf = np.repeat(way[:, np.newaxis], len(lon_at),
                    axis=1)  # add weighting function (because of the latitude)
atemp_era5 = signal.detrend(
    hgt500_era5, axis=0)  # # linearly detrend 500 hPa geopotential height data
atemp_era5_pre = np.zeros((nt * ny, nlat, nlon))
for iy in np.arange(ny):
    atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :]

### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo)
###   is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible
###   to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals
### To be consistent with other reseachers, one can add n-day moving code above.
# EOF analysis
solver = Eof(atemp_era5_pre, center=True)
pcs = solver.pcs()
mid_eig = solver.eigenvalues()
mid_eofs = solver.eofs()
eofs = solver.eofs()

### Print explained variance when using 4 EOFs
#var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig))
#print(var_explained_era5[3])
#0.5316330300316366

reconstruction_era5 = solver.reconstructedField(
    noef)  #Using 4 leading EOFs to reconstruct hgt500 field

### The Kmeans method needs a 2-D data format: number of days x horizontal fields
atemp_era5_post = np.zeros((ny * nt, nlat * nlon))
for i in np.arange(ny * nt):
Ejemplo n.º 23
0
cs.set_clim(-1, 1)
cb = plt.colorbar(cs)
plt.subplot(212)
cs = plt.imshow(covmaps[1], cmap=plt.cm.RdBu_r)
cs.set_clim(-1, 1)
cb = plt.colorbar(cs)
# -

# Then, we can recover the explained variance:

eofvar = solver.varianceFraction(neigs=neofs) * 100
eofvar

# Finally, we can obtain the principal components. To obtain normalized time-series, the `pscaling` argument must be equal to 1.

pcs = solver.pcs(pcscaling=1, npcs=neofs).T
plt.figure()
plt.plot(pcs[0], label='pc1')
plt.plot(pcs[1], label='pc2')
leg = plt.legend()

# ## EOF computation (xarray mode)
#
# In order to have EOF as an `xarray` with all its features, the Eof method of the `eofs.xarray` submodule must be used.

from eofs.xarray import Eof

# Since it uses named labels, the `time_counter` dimension must first be renamed in `time`:

anoms = anoms.rename({'time_counter': 'time'})
solver = Eof(anoms, weights=weights)
Ejemplo n.º 24
0
z5_diffnao = z5_diffn[:, :, :, lonq]

z5n_h = np.nanmean(z500_h[:, 91:, latq, :], axis=0)
z5nao_h = z5n_h[:, :, lonq]

### Calculate NAO
# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(latnao)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(z5nao_h, weights=wgts)

# Retrieve the leading EOF, expressed as the covariance between the leading PC
# time series and the input SLP anomalies at each grid point.
eof1 = solver.eofsAsCovariance(neofs=1).squeeze()
pc1 = solver.pcs(npcs=1, pcscaling=1).squeeze()


### Calculate NAO index
def NAOIndex(anomz5, eofpattern, members):
    """
    Calculate NAO index by regressing Z500 onto the EOF1 pattern
    """
    print('\n>>> Using NAO Index function!')

    if members == True:
        nao = np.empty((anomz5.shape[0], anomz5.shape[1]))
        for i in range(anomz5.shape[0]):
            print('Regressing ensemble ---> %s!' % (i + 1))
            for j in range(anomz5.shape[1]):
                varx = np.ravel(anomz5[i, j, :, :])
Ejemplo n.º 25
0
from eofs.standard import Eof

# ncep

coslat = np.cos(np.deg2rad(curl_ncep_clim.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(curl_ncep_clim.values, weights=wgts)
var = solver.varianceFraction()

plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()

n = 1
eof_ncep = solver.eofs(neofs=n, eofscaling=2)
pc_ncep = solver.pcs(npcs=n, pcscaling=1)
vf_ncep = var[:n]

# cfsr

coslat = np.cos(np.deg2rad(curl_cfsr_clim.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(curl_cfsr_clim.values, weights=wgts)
var = solver.varianceFraction()

plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()

n = 1
eof_cfsr = solver.eofs(neofs=n, eofscaling=2)
pc_cfsr = solver.pcs(npcs=n, pcscaling=1)
Ejemplo n.º 26
0
#		filenames=filenames[:200]


		data=load_regional(filenames,ny,nx)
		data=np.ma.masked_values(data,2.e+20)
		print "data loaded",data.shape
	
		# Set up info
		plt.set_cmap('RdBu')
		neofs=5
		nens=data.shape[0]
		nwanted=57
	
		solver=Eof(data)
		print 'set up EOF solver'
		pcs=solver.pcs(npcs=neofs,pcscaling=1)
		eofs=solver.eofs(neofs=neofs)
		varfrac=solver.varianceFraction(neigs=neofs)
		print 'calculated EOFs'
		print 'printing EOFs'
		for i in range(neofs):
			print 'EOF',i
			plt.clf()
			plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i])
		print "plotting histograms of PCs"
		for i in range(3):
			plt.clf()
			plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i))
			plt.ylim([0,.6])
			plt.savefig(output_dir+'/histogram_pc'+str(i)+'.png')
		print "plotting mean and stdev of ensemble"
     dim='time', skipna=True)
 mes = datetime.datetime.strptime(lmonth[i], '%b').month
 hgt_erai_smean = hgt_erai_seas_mean.sel(
     time=np.logical_and(hgt_erai_seas_mean['time.month'] == mes,
                         hgt_erai_seas_mean['time.year'] != 2002))
 hgt_s4_smean = np.nanmean(hgt_s4.z.values[i:i + 3, :, :, :], axis=0)
 #eof analysis obs
 # Compute anomalies by removing the time-mean
 z_mean = np.nanmean(hgt_erai_smean.values, axis=0)
 z_anom = hgt_erai_smean.values - z_mean
 # Create an EOF solver to do the EOF analysis. Square-root of cosine of
 # latitude weights are applied before the computation of EOFs.
 solver = Eof(z_anom)  #, weights=wgts)
 eofs = solver.eofsAsCorrelation(neofs=5)
 exp_var = solver.varianceFraction()
 pcs = solver.pcs(npcs=5, pcscaling=1)
 pc_erai[i, :, :] = pcs[:, 0:3]
 title = 'Observed HGT 200hPa EOFs - ' + season[i]
 filename = FIG_PATH + 'obs_eof_' + season[i] + '.png'
 PlotEOF(eofs[0:3, :, :], lat_erai, lon_erai, title, filename)
 filename = FIG_PATH + 'obs_scree_' + season[i] + '.png'
 ttle = 'Variance Explained by Observed modes - ' + season[i]
 PlotScree(exp_var, 36, title, filename)
 #eof analysis model mean
 # Compute anomalies by removing the time-mean
 z_mean = np.nanmean(hgt_s4_smean, axis=0)
 #computo media del ensamble
 hgt_s4m_smean = np.mean(np.reshape(hgt_s4_smean, [36, 51, 99, 512]),
                         axis=1)
 z_anom = hgt_s4m_smean - z_mean
 solver_s4 = Eof(z_anom)  #, weights=wgts)
Ejemplo n.º 28
0
fig = plt.figure()
plt.bar(np.arange(6), solver_anom.eigenvalues())
fig.savefig(cartou + 'eigenvalues_temps_anom.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver.varianceFraction())
fig.savefig(cartou + 'varfrac_temps.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver_anom.varianceFraction())
fig.savefig(cartou + 'varfrac_temps_anom.pdf')

fig = plt.figure()
atm_mean = np.mean(temps, axis=0)
for i, pc in enumerate(solver.pcs()[:, 0]):
    plt.plot(atm_mean + pc * solver.eofs()[0] - temps[i, :], alts)
fig.savefig(cartou + 'residual_temps_firstpc.pdf')

fig = plt.figure()
atm_mean = np.mean(temps, axis=0)
for i, pc in enumerate(solver_anom.pcs()[:, 0]):
    plt.plot(atm_anom_mean + pc * solver_anom.eofs()[0] - temps_anom[i, :],
             alts)
fig.savefig(cartou + 'residual_temps_anom_firstpc.pdf')

# plt.figure()
# for i, pc in enumerate(solver.pcs()[:,:2]):
#     plt.plot(atm_mean+pc[0]*solver.eofs()[0]+pc[1]*solver.eofs()[1]-temps[i,:], alts)

# ok so, if keeping only first and second eof I'm able to explain quite a fraction of the variability
Ejemplo n.º 29
0
#timearray.append(dt.fromtimestamp(i))
for i in timearray:
    print(i)
gridfile = '/users/asmit101/data/stuff/myngbay_grd.nc'
ncgrid = NetCDFFile(gridfile)
lat = ncgrid.variables['lat_rho']
lon = ncgrid.variables['lon_rho']
print(chlorophyll1.ndim)
print(chlorophyll1.dims)

surfchl = chlorophyll1[:, 14, :, :]
chl_mean = surfchl.mean(axis=0)
anomaly = surfchl - chl_mean
solver = Eof(anomaly)
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)
plt.pcolormesh(lon, lat, eof1[0], cmap=plt.cm.RdBu_r)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('EOF1 expressed as Correlation')
cbar = plt.colorbar()
cbar.set_label('Correlation Coefficient', rotation=270)
plt.show()
plt.plot(timearray, pc1[:, 0])
plt.xlabel('Year')
plt.ylabel('Normalized Units')
plt.title('PC1 Time Series')
plt.show()
vF1 = solver.varianceFraction(neigs=6)
percentarray = vF1 * 100
array1 = [1, 2, 3, 4, 5, 6]
Ejemplo n.º 30
0
                eof_data = np.vstack((eof_data, data[dt_index]))
            except ValueError:
                sys.exit("Exiting: timeseries have different lengths")

    if args.normalize:
        eof_data_std = np.std(eof_data, axis=1)
        eof_data = eof_data.T / np.std(eof_data, axis=1)
    else:
        #transpose so time is first dimension
        eof_data = eof_data.T

# Crete an EOF solver to do the EOF analysis.  No weights
# First dimension is assumed time by program... not true if timseries is of interest,
print("Solving for n={} modes".format(args.eof_num))
solver = Eof(eof_data, center=False)
pcs = solver.pcs(npcs=args.eof_num)
eigval = solver.eigenvalues(neigs=args.eof_num)
varfrac = solver.varianceFraction(neigs=args.eof_num)
eofs = solver.eofs(neofs=args.eof_num)
eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num)
eofcov = solver.eofsAsCovariance(neofs=args.eof_num)
"""---------------------------------Report-----------------------------------"""
### Print Select Results to file
outfile = args.outfile + '.txt'
print("EOF Results:", file=open(outfile, "w"))
print("------------", file=open(outfile, "a"))

print("File path: {}".format("/".join(filename.split('/')[:-1])),
      file=open(outfile, "a"))
for key, filename in (files.items()):
    print("Files input: {}".format(filename.split('/')[-1]),
Ejemplo n.º 31
0
import cartopy.io.shapereader as shpreader
import xarray as xr
from eofs.standard import Eof
import numpy as np
f = xr.open_dataset('../data/pre.nc')
pre = np.array(f['pre'])
lat = f['lat']
lon = f['lon']
pre_lon = lon
pre_lat = lat
lat = np.array(lat)
coslat = np.cos(np.deg2rad(lat))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(pre, weights=wgts)
eof = solver.eofsAsCorrelation(neofs=3)
pc = solver.pcs(npcs=3, pcscaling=1)
var = solver.varianceFraction()
color1 = []
color2 = []
color3 = []
for i in range(1961, 2017):
    if pc[i - 1961, 0] >= 0:
        color1.append('red')
    elif pc[i - 1961, 0] < 0:
        color1.append('blue')
    if pc[i - 1961, 1] >= 0:
        color2.append('red')
    elif pc[i - 1961, 1] < 0:
        color2.append('blue')
    if pc[i - 1961, 2] >= 0:
        color3.append('red')
Ejemplo n.º 32
0
def main(mFilepath, xFilepath, yFilepath, window, windowFlag=True):
    ## load in the data matrix as a numpy array
    m = np.loadtxt(mFilepath, dtype='float', delimiter=',', skiprows=1)
    # lon = np.loadtxt(xFilepath, dtype='float', delimiter=',', skiprows=1)
    # lat = np.loadtxt(yFilepath, dtype='float', delimiter=',', skiprows=1)
    # time =  np.arange('1958-01-01', '2014-09-22', dtype='datetime64')
    # years = range(1958, 2014)
    ## Create a list of dates spanning the study period
    base = dt.datetime(2014, 9, 21, 1, 1, 1, 1)
    dates = [base - dt.timedelta(days=x) for x in range(0, 20718)]
    date_list = [item for item in reversed(dates)]


    ## attempted to read in the raw data, but was struggling with
    ## the array dimensions
    # ncFiles = os.listdir(workspace)
    # slpList, lonList, latList, timeList = [], [], [], []
    # for fileIn in ncFiles:
    #     ncIn = Dataset(os.path.join(workspace, fileIn), 'r')
    #     slpList.append(ncIn.variables['slp'][:]/100)
    #     lonList.append(ncIn.variables['lon'][:])
    #     latList.append(ncIn.variables['lat'][:])
    #     timeList.append(ncIn.variables['time'][:])
    #     ncIn.close()

    # slp = np.array(slpList)
    # print(slp)
    # print(slp.shape)
    # # print(slp)
    # # print(np.shape(slp))

    ## create an EOF solver object and extrac the first
    ## 4 EOFs and their associated PCs. Scaling can be 
    ## applied if desired
    ## http://ajdawson.github.io/eofs/api/eofs.standard.html#eofs.standard.Eof
    solver = Eof(m)
    eofs = solver.eofs(neofs=4, eofscaling=0)
    pcs = solver.pcs(npcs=4, pcscaling=0)

    # lon, lat = np.meshgrid(lon, lat)

    ## plot the EOFs as nongeographic data for simplicity
    fig = plt.figure(figsize=(10, 10))
    for i in range(4):
        ax = fig.add_subplot(2, 2, i+1)
        lab = 'EOF' + str(i + 1)
        main =  'Unscaled ' + lab

        eofPlot = eofs[i,].reshape(17, 32)

        plt.imshow(eofPlot, cmap=plt.cm.RdBu_r)
        plt.title(main)
        cb = plt.colorbar(orientation='horizontal', cmap=plt.cm.RdBu_r)
        cb.set_label(lab, fontsize=12)

        ## Basemap failure below.  Something with the y cell size went wrong
        # bm = Basemap(projection='cyl', llcrnrlat=16.17951, urcrnrlat=68.48459,
        #              llcrnrlon=-176.0393, urcrnrlon=-98.07901, resolution='c')

        # # bm.contourf(x, y, eof1.squeeze(), clevs, cmap=plt.cm.RdBu_r)
        # bm.drawcoastlines()
        # bm.drawstates()
        # im = bm.pcolormesh(lon, lat, eofPlot, cmap=plt.cm.RdBu_r, latlon=True)
        # # bm.fillcontinents(color='coral', lake_color='aqua')
        # bm.drawparallels(np.arange(-90.,91.,15.))
        # bm.drawmeridians(np.arange(-180.,181.,30.))
        # # bm.drawmapboundary(fill_color='aqua')
        # cb = plt.colorbar(orientation='horizontal')
        # cb.set_label(lab, fontsize=12)
        # plt.title(main, fontsize=16)
        # plt.show()
    plt.show()
    ## Plot the PCs as a time series
    fig = plt.figure(figsize=(16, 16))
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series'

        pcPlot = pcs[:,i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(date_list, pcPlot, color='b')
        if windowFlag:
            plt.plot(date_list, movingaverage(pcPlot, window), 
                     color='r', linestyle='-')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

    ## Subset the dates to the last year of the dataset
    short_date = [item for item in date_list if 
                  item >= dt.datetime(2013, 6, 17) 
                  and item < dt.datetime(2014, 6, 25)]
    indices = [date_list.index(item) for item in short_date]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 year)'

        pcPlot = pcs[np.array(indices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(short_date, pcPlot, color='b')
        
        if windowFlag:
            plt.plot(short_date, 
                     movingaverage(pcPlot, window), color='r')

        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()

        ## Subset the dates to the last year of the dataset
    decade = [item for item in date_list if 
              item >= dt.datetime(2004, 6, 17) 
                  and item < dt.datetime(2014, 6, 17)]
    decadeIndices = [date_list.index(item) for item in decade]

    fig = plt.figure(figsize=(16, 16))
    ## Plot out the last year of the PCs to get a more detailed
    ## pattern for comparison to the R results
    for i in range(4):
        ylab = 'PC' + str(i+1)
        title = ylab + ' Time Series (1 decade)'

        pcPlot = pcs[np.array(decadeIndices),i]
        if i==0:
            theAx = fig.add_subplot(4, 1, i+1)
            plt.setp(theAx.get_xticklabels(), visible=False)
            theAx.set_xlabel('')
        if i>0 and i<3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.setp(ax.get_xticklabels(), visible=False)
        if i==3:
            ax = fig.add_subplot(4, 1, i+1, sharex=theAx)
            plt.xlabel('Date')

        plt.plot(decade, pcPlot, color='b')
        if windowFlag:
            plt.plot(decade, 
                     movingaverage(pcPlot, window), color='r')
        plt.axhline(0, color='k')
        plt.title(title)
        plt.ylabel(ylab)
    plt.show()
Ejemplo n.º 33
0
    cnc = camgoda(cfull_path)
    tnc = camgoda(tfull_path)
    is3d, var, vname = cnc.ExtractData(variable, box)
    is3d, var, vname = tnc.ExtractData(variable, box)
    if n == 0:
        nlats, nlons = cnc.data.shape
        boxlat = cnc.boxlat
        boxlon = cnc.boxlon
        d = np.zeros(shape=(len(dates), nlats * nlons))
    d[n, :] = np.ndarray.flatten(tnc.data - cnc.data)

# Compute the amplitude timeseries and EOF spatial distributions of the data array
print "Computing the EOF..."
EOF = Eof(d, center=removeMeans)
eof = EOF.eofs(neofs=num_eofs)
pca = EOF.pcs(npcs=num_eofs, pcscaling=1)
varfrac = EOF.varianceFraction()
print "Finished!"

# Reshape F into a spatial grid
eof_grid = np.reshape(eof, (eof.shape[0], nlats, nlons))

# Make the maps
bmlon, bmlat = np.meshgrid(boxlon, boxlat)
southern_lat = boxlat[0]
northern_lat = boxlat[-1]
left_lon = boxlon[0]
right_lon = boxlon[-1]
if 0 in boxlon[1:-2]:  # if we cross the gml
    left_lon = boxlon[0] - 360
Ejemplo n.º 34
0
def calculate_correlations_and_pvalues(var_pairs, label_to_vname_to_season_to_yearlydata: dict, season_to_months: dict,
                                       region_of_interest_mask, lakes_mask=None, lats=None) -> dict:
    """

    :param var_pairs:
    :param label_to_vname_to_season_to_yearlydata:
    :param lats needed for weighting of eof solver
    :return: {(vname1, vname2): {label: {season: [corr, pvalue]}}}}
    """
    res = {}
    for pair in var_pairs:
        pair = tuple(pair)

        res[pair] = {}

        for label in label_to_vname_to_season_to_yearlydata:

            res[pair][label] = {}
            for season in season_to_months:

                years_sorted = sorted(label_to_vname_to_season_to_yearlydata[label][pair[0]][season])

                v1_dict, v2_dict = [label_to_vname_to_season_to_yearlydata[label][pair[vi]][season] for vi in range(2)]
                v1 = np.array([v1_dict[y] for y in years_sorted])
                v2 = np.array([v2_dict[y] for y in years_sorted])

                r = np.zeros(v1.shape[1:]).flatten()
                p = np.ones_like(r).flatten()

                v1 = v1.reshape((v1.shape[0], -1))
                v2 = v2.reshape((v2.shape[0], -1))

                # for hles and ice fraction get the eof of the ice and correlate
                if pair == ("hles_snow", "lake_ice_fraction"):
                    # assume that v2 is the lake_ice_fraction
                    v_lake_ice = v2

                    positions_hles_region = np.where(region_of_interest_mask.flatten())[0]
                    positions_lakes = np.where(lakes_mask.flatten())[0]

                    v_lake_ice = v_lake_ice[:, positions_lakes]
                    # calculate anomalies
                    v_lake_ice = v_lake_ice - v_lake_ice.mean(axis=0)

                    weights = np.cos(np.deg2rad(lats.flatten()[positions_lakes])) ** 0.5

                    solver = Eof(v_lake_ice, weights=weights[..., np.newaxis])
                    print(label, solver.varianceFraction(neigs=10))


                    # use the module of the PC1 to make sure it has physical meaning
                    pc1_ice = solver.pcs(npcs=1)[:, 0]

                    # debug: plot eof
                    eofs = solver.eofs(neofs=1)

                    eof_2d = np.zeros_like(lats).flatten()
                    eof_2d[positions_lakes] = eofs[:, 0] * pc1_ice
                    eof_2d = eof_2d.reshape(lats.shape)

                    plt.figure()
                    im = plt.pcolormesh(eof_2d.T)
                    plt.colorbar(im)
                    plt.show()

                    if True:
                        raise Exception


                    # print(positions)
                    for i in positions_hles_region:
                        r[i], p[i] = pearsonr(v1[:, i], pc1_ice)

                else:

                    positions = np.where(region_of_interest_mask.flatten())

                    # print(positions)
                    for i in positions[0]:
                        r[i], p[i] = pearsonr(v1[:, i], v2[:, i])

                r.shape = region_of_interest_mask.shape
                p.shape = region_of_interest_mask.shape

                r = np.ma.masked_where(~region_of_interest_mask, r)
                p = np.ma.masked_where(~region_of_interest_mask, p)

                res[pair][label][season] = [r, p]

    return res
Ejemplo n.º 35
0
a = Dataset(filename1, mode='r')
b = Dataset(filename2, mode='r')
dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a))
dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b))
sinData = dataset1['data'].T
sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0)
sinData = sinData.values
bullseyeData = dataset2['data']

#%% EOF analysis
solver = Eof(sinData)
eigenvalues = solver.eigenvalues()  # Get eigenvalues
EOFs = solver.eofs(eofscaling=0)  # Get EOFs
EOFs_reg = solver.eofsAsCorrelation(
)  # Get EOFs as correlation b/w PCs &  orig data
PCs = solver.pcs(pcscaling=1)  # Get PCs

# Get variance explained and # of PCs
VarExplain = np.round(solver.varianceFraction() * 100, 1)
numPCs2Keep = cumSUM(VarExplain, 90)

# Calculate EOFs
EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0])  # Get EOF 1 & scale it
EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1])  # Get EOF 2 & scale it
EOF1_reg = EOFs_reg[0, :]
EOF2_reg = EOFs_reg[1, :]
stdPC1 = PCs[:, 0]
stdPC2 = PCs[:, 1]

# Alt method of getting EOF 1 by regressing PC on data
#EOF1_reg = np.expand_dims(stdPC1,0) @ sinData
Ejemplo n.º 36
0
    '/home/bock/Documents/tesis/datos/ncep2_atlsur_2009_2015.nc')
clim_nc = dat['curl'].groupby('time.month').mean('time').sel(
    lat=slice(-20, -40), lon=slice(-64, -22))

coslat = np.cos(np.deg2rad(clim_nc.lat.values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(clim_nc.values, weights=wgts)
var = solver.varianceFraction()
plt.figure(1)
plt.bar(np.arange(0, len(var), 1), var * 100)
plt.show()
plt.close()
n = input('Cuantos PC extraer: ')
n = int(n)
eof = solver.eofs(neofs=n, eofscaling=2)
pc = solver.pcs(npcs=n, pcscaling=1)
vf = var[:n]

fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[0] * 1e7, cm.GMT_no_green,
                       np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m')
plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof1_ncep.png',
            bbox_inches='tight')
plt.show()
fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[1] * 1e7, cm.GMT_no_green,
                       np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m')
plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof2_ncep.png',
            bbox_inches='tight')
plt.show()

dat1 = xarray.open_dataset(
    '/home/bock/Documents/tesis/datos/cfsr_atlsur_2009_2015.nc')