예제 #1
0
    def PDO_single_split(s, ds, df_splits, PDO_patterns):
        progress = 100 * (s + 1) / splits.size
        dates_train = df_splits.loc[s]['TrainIsTrue'][df_splits.loc[s]
                                                      ['TrainIsTrue']].index
        train_yrs = np.unique(dates_train.year)
        dates_all_train = pd.to_datetime(
            [d for d in dates if d.year in train_yrs])
        ###        dates_train_yrs = ###
        dates_test = df_splits.loc[s]['TrainIsTrue'][
            ~df_splits.loc[s]['TrainIsTrue']].index
        n = dates_train.size
        r = int(100 * n / df_splits.loc[s].index.size)
        print(
            f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)",
            end="")

        PDO_pattern, solver, adjust_sign = get_PDO(
            ds.sel(time=dates_all_train))
        data_train = find_precursors.calc_spatcov(ds.sel(time=dates_train),
                                                  PDO_patterns[s])
        data_test = find_precursors.calc_spatcov(ds.sel(time=dates_test),
                                                 PDO_patterns[s])

        df_test = pd.DataFrame(data=data_test.values,
                               index=dates_test,
                               columns=['0_901_PDO'])
        df_train = pd.DataFrame(data=data_train.values,
                                index=dates_train,
                                columns=['0_901_PDO'])

        df = pd.concat([df_test, df_train]).sort_index()
        return (df, PDO_pattern)
예제 #2
0
def single_split_calc_spatcov(precur, precur_arr: np.ndarray, corr: np.ndarray,
                              labels: np.ndarray, a_wghts: np.ndarray,
                              lags: np.ndarray, use_sign_pattern: bool):
    ts_list = np.zeros( (lags.size), dtype=list )
    track_names = []
    for il,lag in enumerate(lags):

        # if lag represents aggregation period:
        if precur.period_means_array == True:
            precur_arr = precur.precur_arr.sel(lag=il)

        pattern = np.copy(corr[il]) # copy to fix ValueError: assignment destination is read-only
        mask = labels[il]
        pattern[np.isnan(mask)] = np.nan
        if use_sign_pattern == True:
            pattern = np.sign(pattern)
        if np.isnan(pattern).all():
            # no regions of this variable and split
            nants = np.zeros( (precur_arr.shape[0], 1) )
            nants[:] = np.nan
            ts_list[il] = nants
            pass
        else:
            xrts = find_precursors.calc_spatcov(precur_arr, pattern,
                                                area_wght=a_wghts)
            ts_list[il] = xrts[:,None]
        track_names.append(f'{lag}..0..{precur.name}' + '_sp')
    return ts_list, track_names
예제 #3
0
파일: class_EOF.py 프로젝트: VU-IVM/RGCPD
    def get_ts(self, tfreq_ts=1, df_splits=None):
        if df_splits is None:
            df_splits = self.df_splits
        else:
            df_splits = df_splits
        splits = self.eofs['split'].values
        neofs  = self.eofs['eof'].values
        ds = functions_pp.import_ds_timemeanbins(self.filepath,
                                                tfreq=tfreq_ts,
                                                selbox=self.selbox,
                                                start_end_date=self.start_end_date,
                                                start_end_year=self.start_end_year)
        df_data_s   = np.zeros( (splits.size) , dtype=object)
        dates = pd.to_datetime(ds['time'].values)
        for s in splits:

            dfs = pd.DataFrame(columns=neofs, index=dates)
            for i, e in enumerate(neofs):

                pattern = self.eofs.sel(split=s, eof=e)
                data = find_precursors.calc_spatcov(ds, pattern)
                dfs[e] = pd.Series(data.values,
                                   index=dates)
                if i == neofs.size-1:
                    dfs = dfs.merge(df_splits.loc[s], left_index=True, right_index=True)
            df_data_s[s] = dfs
        self.df = pd.concat(list(df_data_s), keys=range(splits.size))
예제 #4
0
def PDO_single_split(s, ds_monthly, ds, df_splits):

    splits = df_splits.index.levels[0]
    progress = 100 * (s + 1) / splits.size
    dates_train_origtime = df_splits.loc[s]['TrainIsTrue'][
        df_splits.loc[s]['TrainIsTrue']].index
    dates_test_origtime = df_splits.loc[s]['TrainIsTrue'][
        ~df_splits.loc[s]['TrainIsTrue']].index

    n = dates_train_origtime.size
    r = int(100 * n / df_splits.loc[s].index.size)
    print(f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)",
          end="")

    # convert Train test year from original time to monthly
    train_yrs = np.unique(dates_train_origtime.year)
    dates_monthly = pd.to_datetime(ds_monthly.time.values)
    dates_all_train = pd.to_datetime(
        [d for d in dates_monthly if d.year in train_yrs])

    PDO_pattern, solver, adjust_sign = get_PDO(
        ds_monthly.sel(time=dates_all_train))
    data_train = find_precursors.calc_spatcov(
        ds.sel(time=dates_train_origtime).load(), PDO_pattern)
    df_train = pd.DataFrame(data=data_train.values,
                            index=dates_train_origtime,
                            columns=['PDO'])
    if splits.size > 1:
        data_test = find_precursors.calc_spatcov(
            ds.sel(time=dates_test_origtime).load(), PDO_pattern)
        df_test = pd.DataFrame(data=data_test.values,
                               index=dates_test_origtime,
                               columns=['PDO'])
        df = pd.concat([df_test, df_train]).sort_index()
    else:
        df = df_train
    return (df, PDO_pattern)
예제 #5
0
def loop_get_spatcov(precur,
                     precur_aggr=None,
                     kwrgs_load: dict = None,
                     force_reload: bool = False,
                     lags: list = None):

    name = precur.name
    use_sign_pattern = precur.use_sign_pattern
    corr_xr = precur.corr_xr
    prec_labels = precur.prec_labels
    splits = corr_xr.split
    if lags is not None:
        lags = np.array(lags)  # ensure lag is np.ndarray
        corr_xr = corr_xr.sel(lag=lags).copy()
        prec_labels = prec_labels.sel(lag=lags).copy()
    else:
        lags = prec_labels.lag.values
    dates = pd.to_datetime(precur.precur_arr.time.values)
    oneyr = functions_pp.get_oneyr(dates)
    if oneyr.size == 1:  # single val per year precursor
        tfreq = 365
    else:
        tfreq = (oneyr[1] - oneyr[0]).days

    if precur_aggr is None and force_reload == False:
        precur_arr = precur.precur_arr
        if tfreq == 365:
            precur_arr = precur.precur_arr
        # use precursor array with temporal aggregation that was used to create
        # correlation map. When tfreq=365, aggregation (one-value-per-year)
        # is already done. period used to aggregate was defined by the lag

    else:
        if precur_aggr is not None:
            precur.tfreq = precur_aggr
        precur.load_and_aggregate_precur(kwrgs_load.copy())
        precur_arr = precur.precur_arr

    precur.area_grid = find_precursors.get_area(precur_arr)
    if precur_arr.shape[-2:] != corr_xr.shape[-2:]:
        print('shape loaded precur_arr != corr map, matching coords')
        corr_xr, prec_labels = functions_pp.match_coords_xarrays(
            precur_arr, *[corr_xr, prec_labels])

    ts_sp = np.zeros((splits.size), dtype=object)
    for s in splits:
        ts_list = np.zeros((lags.size), dtype=list)
        track_names = []
        for il, lag in enumerate(lags):

            # if lag represents aggregation period:
            if type(precur.lags[il]) is np.ndarray and precur_aggr is None:
                precur_arr = precur.precur_arr.sel(lag=il)

            corr_vals = corr_xr.sel(split=s).isel(lag=il)
            mask = prec_labels.sel(split=s).isel(lag=il)
            pattern = corr_vals.where(~np.isnan(mask))
            if use_sign_pattern == True:
                pattern = np.sign(pattern)
            if np.isnan(pattern.values).all():
                # no regions of this variable and split
                nants = np.zeros((precur_arr.time.size, 1))
                nants[:] = np.nan
                ts_list[il] = nants
                pass
            else:
                # if normalize == True:
                #     spatcov_full = calc_spatcov(full_timeserie, pattern)
                #     mean = spatcov_full.sel(time=dates_train).mean(dim='time')
                #     std = spatcov_full.sel(time=dates_train).std(dim='time')
                #     spatcov_test = ((spatcov_full - mean) / std)
                # elif normalize == False:
                xrts = find_precursors.calc_spatcov(precur_arr, pattern)
                ts_list[il] = xrts.values[:, None]
            track_names.append(f'{lag}..0..{precur.name}' + '_sp')

        # concatenate timeseries all of lags
        tsCorr = np.concatenate(tuple(ts_list), axis=1)

        dates = pd.to_datetime(precur_arr.time.values)
        ts_sp[s] = pd.DataFrame(tsCorr, index=dates, columns=track_names)
    # df_sp = pd.concat(list(ts_sp), keys=range(splits.size))
    return ts_sp
예제 #6
0
        dates_RV = core_pp.get_subdates(pd.to_datetime(rg.fulltso.time.values),
                                       start_end_date=rg.start_end_TVdate)
        RV_ts = rg.fulltso.sel(time=dates_RV)
        ds_v300 = core_pp.import_ds_lazy(rg.list_precur_pp[1][1])
        dslocal = core_pp.get_selbox(ds_v300, selbox=selbox)



        datesRW = core_pp.get_subdates(pd.to_datetime(dslocal.time.values),
                                       start_end_date=rg.start_end_TVdate)
        datesRW = datesRW + pd.Timedelta(f'{lag}d')
        dslocal = dslocal.sel(time=datesRW)

        wv6local = core_pp.get_selbox(xarray.sel(lag=5), selbox=selbox)
        patternlocal = wv6local.mean(dim='lag')
        ts = find_precursors.calc_spatcov(dslocal, patternlocal)
        ts_15, d = functions_pp.time_mean_bins(ts, tfreq, start_end_date=start_end_TVdate,
                                                   closed_on_date=start_end_TVdate[-1])
        RV_15, d = functions_pp.time_mean_bins(RV_ts, tfreq, start_end_date=start_end_TVdate,
                                                   closed_on_date=start_end_TVdate[-1])
        corr_value = np.corrcoef(ts_15.values.squeeze(), RV_15.values.squeeze())[0][1]
        print('corr: {:.2f}'.format(corr_value))
        values.append(corr_value)
    plt.plot(range(-9,10), values[1:])
    # df_wv6 = ts_15.to_dataframe(name='wv6p2')
#%%
sst = rg.list_for_MI[2]

dates_years = functions_pp.get_oneyr(sst.df_splits.loc[0].index, *event_dates.year)
sst.precur_arr.sel(time=dates_years).mean(dim='time').plot(vmin=-.3, vmax=.3,
                                                           cmap=plt.cm.RdBu_r)
예제 #7
0
def PDO_temp(filename, ex, df_splits=None):
    #%%
    '''
    PDO is calculated based upon all data points in the training years,
    Subsequently, the PDO pattern is projection on the sst.sel(time=dates_train)
    to enable retrieving the PDO timeseries on a subset on the year.
    It is similarly also projected on the dates_test.
    From https://climatedataguide.ucar.edu/climate-data/pacific-decadal-oscillation-pdo-definition-and-indices
    '''

    if df_splits is None:
        RV = ex[ex['RV_name']]
        df_splits, ex = functions_pp.rand_traintest_years(RV, ex)

    kwrgs_pp = {
        'selbox': {
            'la_min': 20,  # select domain in degrees east
            'la_max': 65,
            'lo_min': 115,
            'lo_max': 250
        },
        'format_lon': 'only_east'
    }
    ds = core_pp.import_ds_lazy(filename, **kwrgs_pp)

    to_freq = ex['tfreq']
    if to_freq != 1:
        ds, dates = functions_pp.time_mean_bins(ds,
                                                ex,
                                                to_freq=to_freq,
                                                seldays='all')
        ds['time'] = dates

    dates = pd.to_datetime(ds.time.values)

    splits = df_splits.index.levels[0]
    data = np.zeros((splits.size, ds.latitude.size, ds.longitude.size))
    PDO_patterns = xr.DataArray(
        data,
        coords=[splits, ds.latitude.values, ds.longitude.values],
        dims=['split', 'latitude', 'longitude'])
    list_splits = []
    for s in splits:

        progress = 100 * (s + 1) / splits.size
        dates_train = df_splits.loc[s]['TrainIsTrue'][df_splits.loc[s]
                                                      ['TrainIsTrue']].index
        train_yrs = np.unique(dates_train.year)
        dates_all_train = pd.to_datetime(
            [d for d in dates if d.year in train_yrs])
        dates_test = df_splits.loc[s]['TrainIsTrue'][
            ~df_splits.loc[s]['TrainIsTrue']].index
        n = dates_train.size
        r = int(100 * n / df_splits.loc[s].index.size)
        print(
            f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)",
            end="")

        PDO_patterns[s], solver, adjust_sign = get_PDO(
            ds.sel(time=dates_all_train))

        PDO_patterns[s] = PDO_patterns[s].interpolate_na(dim='longitude')
        data_train = find_precursors.calc_spatcov(ds.sel(time=dates_train),
                                                  PDO_patterns[s])
        data_test = find_precursors.calc_spatcov(ds.sel(time=dates_test),
                                                 PDO_patterns[s])

        df_test = pd.DataFrame(data=data_test.values,
                               index=dates_test,
                               columns=['0_901_PDO'])
        df_train = pd.DataFrame(data=data_train.values,
                                index=dates_train,
                                columns=['0_901_PDO'])

        df = pd.concat([df_test, df_train]).sort_index()
        list_splits.append(df)

    df_PDO = pd.concat(list_splits, axis=0, keys=splits)
    #%%
    return df_PDO