Esempio n. 1
0
def parcorr_z(field: xr.DataArray,
              ts: np.ndarray,
              z: pd.DataFrame,
              lag_z: int = 0):
    '''
    Regress out influence of 1-d timeseries z. if lag_z==0, dates of z will match
    dates of field. Note, lag_z >= 1 probably only makes sense when using
    subseasonal data (more then 1 value per year).

    Parameters
    ----------
    field : xr.DataArray
        (time, lat, lon) field.
    ts : np.ndarray
        Target timeseries.
    z : pd.DataFrame
        1-d timeseries.

    Returns
    -------
    corr_vals : np.ndarray
    pvals : np.ndarray

    '''

    # if more then one year is filled with NaNs -> no corr value calculated.
    dates = pd.to_datetime(field.time.values)
    field, ts = check_NaNs(field, ts)
    x = np.ma.zeros(field.shape[1])
    corr_vals = np.array(x)
    pvals = np.array(x)
    fieldnans = np.array([np.isnan(field[:, i]).any() for i in range(x.size)])
    nonans_gc = np.arange(0, fieldnans.size)[fieldnans == False]

    # ts = np.expand_dims(ts[:], axis=1)
    # adjust to shape (samples, dimension) and remove first datapoints if
    # lag_z != 0.
    y = np.expand_dims(ts[lag_z:], axis=1)
    if len(z.values.squeeze().shape) == 1:
        z = np.expand_dims(z.loc[dates].values.squeeze(), axis=1)
    else:
        z = z.loc[dates].values.squeeze()
    if lag_z >= 1:
        z = z[:-lag_z]  # last values are 'removed'
    for i in nonans_gc:
        cond_ind_test = ParCorr()
        field_i = np.expand_dims(field[lag_z:, i], axis=1)
        a, b = cond_ind_test.run_test_raw(field_i, y, z)
        corr_vals[i] = a
        pvals[i] = b
    # restore original nans
    corr_vals[fieldnans] = np.nan
    return corr_vals, pvals
Esempio n. 2
0
def parcorr_map_time(field: xr.DataArray, ts: np.ndarray, lag_y=0, lag_x=0):
    '''
    Only works for subseasonal data (more then 1 datapoint per year).
    Lag must be >= 1

    Parameters
    ----------
    field : xr.DataArray
        (time, lat, lon) field.
    ts : np.ndarray
        Target timeseries.
    lag : int, optional
        DESCRIPTION. The default is 1.
    target : TYPE, optional
        DESCRIPTION. The default is True.
    precursor : TYPE, optional
        DESCRIPTION. The default is True.

    Returns
    -------
    corr_vals : np.ndarray
    pvals : np.ndarray

    '''
    # field = precur_train.sel(time=dates_lag) ; ts = RV_ts.values.squeeze()

    if type(lag_y) is int:
        lag_y = [lag_y]
    if type(lag_x) is int:
        lag_x = [lag_x]

    max_lag = max(max(lag_y), max(lag_x))
    assert max_lag > 0, 'lag_x or lag_y must be >= 1'
    # if more then one year is filled with NaNs -> no corr value calculated.
    field, ts = check_NaNs(field, ts)
    x = np.ma.zeros(field.shape[1])
    corr_vals = np.array(x)
    pvals = np.array(x)

    fieldnans = np.array([np.isnan(field[:, i]).any() for i in range(x.size)])
    nonans_gc = np.arange(0, fieldnans.size)[fieldnans == False]

    if max(lag_y) > 0:
        zy = [
            np.expand_dims(ts[max_lag - l:-l], axis=1) for l in lag_y if l != 0
        ]
        zy = np.concatenate(zy, axis=1)

    y = np.expand_dims(ts[max_lag:], axis=1)
    for i in nonans_gc:
        cond_ind_test = ParCorr()
        if max(lag_x) > 0:
            zx = [
                np.expand_dims(field[max_lag - l:-l, i], axis=1) for l in lag_x
                if l != 0
            ]
            zx = np.concatenate(zx, axis=1)
        if max(lag_x) > 0 and max(lag_y) > 0:  # both zy and zx defined
            z = np.concatenate((zy, zx), axis=1)
        elif max(lag_x) > 0 and max(lag_y) == 0:  # only zx defined
            z = zx
        elif max(lag_x) == 0 and max(lag_y) > 0:
            z = zy
        field_i = np.expand_dims(field[max_lag:, i], axis=1)
        a, b = cond_ind_test.run_test_raw(field_i, y, z)
        corr_vals[i] = a
        pvals[i] = b
    # restore original nans
    corr_vals[fieldnans] = np.nan
    return corr_vals, pvals