예제 #1
0
def regress_3dim_data_onto_eofs(data: object,
                                eofdata: eof.EOFDataForAllDOYs) -> pc.PCData:
    """
    Finds time-dependent coefficients w.r.t the DOY-dependent EOF basis for time-dependent spatially resolved data.

    I.e. it finds the PCs for temporally resolved OLR data. But the function can also be used for other datasets,
    as long as those datasets have the same structure like the the class :class:`mjoindices.olr_handling.OLRData`.

    :param data: The data, for which the coefficients are sought. Should be an object of class
        :class:`mjoindices.olr_handling.OLRData` or of similar structure.
    :param eofdata: The DOY-dependent pairs of EOFs, like computed by, e.g., :func:`calc_eofs_from_olr`

    :return: The time-dependent PCs as :class:`mjoindices.principal_components.PCData`
    """
    if not np.all(data.lat == eofdata.lat):
        raise ValueError("Latitude grid of EOFs and OLR is not equal.")
    if not np.all(data.long == eofdata.long):
        raise ValueError("Longitude grid of EOFs and OLR is not equal.")
    pc1 = np.empty(data.time.size)
    pc2 = np.empty(data.time.size)

    for idx, val in enumerate(data.time):
        day = val
        olr_singleday = data.get_olr_for_date(day)
        doy = tools.calc_day_of_year(day)
        (pc1_single, pc2_single) = regress_vector_onto_eofs(
            eofdata.eofdata_for_doy(doy).reshape_to_vector(olr_singleday),
            eofdata.eof1vector_for_doy(doy), eofdata.eof2vector_for_doy(doy))
        pc1[idx] = pc1_single
        pc2[idx] = pc2_single
    return pc.PCData(data.time, pc1, pc2)
예제 #2
0
def test_calc_day_of_year_array():

    errors = []
    dates = np.array([np.datetime64("2019-01-01"), np.datetime64("2019-01-02"), np.datetime64("2019-01-03")])
    target = tools.calc_day_of_year(dates)
    if not np.all(target == np.array([1, 2, 3])):
        errors.append("Error in DOY calc for array")

    dates = np.array([np.datetime64("2020-12-30"), np.datetime64("2020-12-31"), np.datetime64("2021-01-01"),
                      np.datetime64("2021-01-02")])
    target = tools.calc_day_of_year(dates)
    if not np.all(target == np.array([365, 366, 1, 2])):
        errors.append("Error in DOY calc for array")

    # Test work around to deal with datetime format in ns
    dates = np.array([np.datetime64("2019-01-01"), np.datetime64("2019-01-02"), np.datetime64("2019-01-03")], dtype="datetime64[ns]")
    target = tools.calc_day_of_year(dates)
    if not np.all(target == np.array([1, 2, 3])):
        errors.append("Error in DOY calc for array with format ns")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
예제 #3
0
def test_calc_day_of_year_scalar():

    errors = []
    date = np.datetime64("2019-01-01")
    target = tools.calc_day_of_year(date)
    if not target == 1:
        errors.append("Error in DOY calc for %s" % str(date))

    date = np.datetime64("2019-01-11")
    target = tools.calc_day_of_year(date)
    if not target == 11:
        errors.append("Error in DOY calc for %s" % str(date))

    date = np.datetime64("2019-02-28")
    target = tools.calc_day_of_year(date)
    if not target == 31 + 28:
        errors.append("Error in DOY calc for %s" % str(date))

    date = np.datetime64("2019-12-31")
    target = tools.calc_day_of_year(date)
    if not target == 365:
        errors.append("Error in DOY calc for %s" % str(date))

    # test leap year
    date = np.datetime64("2020-02-29")
    target = tools.calc_day_of_year(date)
    if not target == 31 + 29:
        errors.append("Error in DOY calc for %s" % str(date))

    # test leap year
    date = np.datetime64("2020-12-31")
    target = tools.calc_day_of_year(date)
    if not target == 366:
        errors.append("Error in DOY calc for %s" % str(date))

    # Test work around to deal with datetime format in ns
    date = np.datetime64("2019-01-01", "ns")
    target = tools.calc_day_of_year(date)
    if not target == 1:
        errors.append("Error in DOY calc for %s with format ns" % str(date))

    # Test work around to deal with datetime format in ns
    date = np.datetime64("2019-01-11", "ns")
    target = tools.calc_day_of_year(date)
    if not target == 11:
        errors.append("Error in DOY calc for %s with format ns" % str(date))

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
예제 #4
0
def calc_timeseries_agreement(ref_data: np.ndarray,
                              ref_time: np.ndarray,
                              data: np.ndarray,
                              time: np.ndarray,
                              exclude_doy366: bool = False,
                              do_print: bool = False) -> typing.Tuple:
    """
    Calculates comparison values of two time series.

    :param ref_data: The reference time series vector.
    :param ref_time: The time grid of the reference.
    :param data: The time series vector to validate.
    :param time: The time grid of the time series to validate. It will be checked if this is similar to the time grid
        of the reference.
    :param exclude_doy366: If True, the data for DOY 366 will not be considered in the statistics.
    :param do_print: If True, some statistical values will also be shown in the console.

    :return: A tuple containing values for the following quantities: correlation, mean of the differences,
        standard deviation of the differences, and percentiles of the absolute differences for 68%, 95%, and 99%.
    """
    if not np.all(ref_time == time):
        raise ValueError("Time series do not cover the same periods.")
    if exclude_doy366:
        if do_print:
            print("###### DOY 366 excluded")
        doys = tools.calc_day_of_year(ref_time)
        inds_used = np.nonzero(doys != 366)
        inds_not_used = np.nonzero(doys == 366)
        calc_ref_data = ref_data[inds_used]
        calc_data = data[inds_used]
    else:
        if do_print:
            print("##### Complete time series")
        calc_ref_data = ref_data
        calc_data = data
        inds_used = (np.arange(0, ref_time.size, 1), )
        inds_not_used = (np.array([], dtype="int64"), )

    (corr, diff_mean, diff_std, diff_ts, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99)\
        = calc_vector_agreement(calc_ref_data, calc_data, percentage=False, do_print=do_print)

    return (inds_used, inds_not_used, corr, diff_mean, diff_std, diff_ts,
            diff_abs_percent68, diff_abs_percent95, diff_abs_percent99)
def test_calc_timeseries_agreement():

    signal_time = np.arange("2010-01-01", "2020-12-31", dtype='datetime64[D]')
    data_time = np.arange("2010-01-01", "2020-12-31", dtype='datetime64[D]')

    n = signal_time.size

    signal = np.ones(n)
    numpy.random.seed(1000)
    noise = numpy.random.randn(n)
    data = signal + noise

    inds_used, inds_not_used, corr, diff_mean, diff_std, diff_ts, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = evalt.calc_timeseries_agreement(
        signal,
        signal_time,
        data,
        data_time,
        exclude_doy366=False,
        do_print=False)

    errors = []

    if not np.allclose(diff_ts, noise):
        errors.append("Vector of differences not correct.")

    if not np.isclose(diff_mean, -0.016998885708167304):
        errors.append("Mean value of differences not correct.")

    if not np.isclose(
            diff_std, 1.011669296908716
    ):  # std dev of standard uniform distribution = sqrt(1/12*(1-0)) = sqrt(1/12)
        errors.append("Mean value of differences not correct.")

    if not np.isclose(diff_abs_percent68,
                      1.0021767602961196):  # approximately like stddev
        errors.append("68% percentile not correct.")

    if not np.isclose(diff_abs_percent95,
                      1.9723863150018495):  # approximately like 2*stddev
        errors.append("95% percentile not correct.")

    if not np.isclose(
            diff_abs_percent99, 2.6244012732344646
    ):  # smaller than 3*stddev, since 3*stddev corresponds to 99.9%
        errors.append("99% percentile not correct.")

    noise = np.ones(n) * 2
    doys = tools.calc_day_of_year(signal_time)
    doy366_inds = np.nonzero(doys == 366)
    noise[doy366_inds] = 1000000
    data = signal + noise

    inds_used, inds_not_used, corr, diff_mean, diff_std, diff_ts, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = evalt.calc_timeseries_agreement(
        signal,
        signal_time,
        data,
        data_time,
        exclude_doy366=True,
        do_print=False)

    if not np.all(inds_not_used[0] == doy366_inds[0]):
        errors.append("Wrong indices excluded.")
    if not diff_mean == 2.0:
        errors.append("Mean of difference is influenced by DOY 366 value.")

    inds_used, inds_not_used, corr, diff_mean, diff_std, diff_ts, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = evalt.calc_timeseries_agreement(
        signal,
        signal_time,
        data,
        data_time,
        exclude_doy366=False,
        do_print=False)

    if inds_not_used[0].size != 0:
        errors.append("Indices excluded, which should not be the case.")
    if not diff_mean > 2.0:
        errors.append(
            "Mean of difference is not influenced by DOY 366 value, which should be the case."
        )

    signal_time = np.arange("2010-01-01", "2020-12-31", dtype='datetime64[D]')
    data_time = np.arange("2010-01-02", "2021-01-01", dtype='datetime64[D]')
    with pytest.raises(ValueError) as e:
        inds_used, inds_not_used, corr, diff_mean, diff_std, diff_ts, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = evalt.calc_timeseries_agreement(
            signal,
            signal_time,
            data,
            data_time,
            exclude_doy366=False,
            do_print=False)

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))