Beispiel #1
0
def test_derive_edge_parameters():
    """
    assert the result types for the edge parameters
    """
    src = np.linspace(-1, 1, 1000)
    ref = src * 0.5
    percentiles = np.linspace(0, 100, 100)
    perc_src = ml_percentile(src, percentiles)
    perc_ref = ml_percentile(ref, percentiles)

    a, b, c = derive_edge_parameters(src=src,
                                     ref=ref,
                                     perc_src=perc_src,
                                     perc_ref=perc_ref)

    assert (type(a) is tuple) & (type(b) is tuple) & (type(c) is np.ndarray)
Beispiel #2
0
def test_ml_percentile():
    """
    Test the percentile implementation that is used in Matlab.
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    perc_should = [1.0, 1.0, 1.0, 1.1, 2.0, 2.0, 5.0, 5.3, 8.4, 10., 10., 10.,
                   10.]
    perc = ml_percentile(arr1, percentiles)
    nptest.assert_almost_equal(perc, perc_should)
Beispiel #3
0
def test_ml_percentile():
    """
    Test the percentile implementation that is used in Matlab.
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    perc_should = [1.0, 1.0, 1.0, 1.1, 2.0, 2.0, 5.0, 5.3, 8.4, 10., 10., 10.,
                   10.]
    perc = ml_percentile(arr1, percentiles)
    nptest.assert_almost_equal(perc, perc_should)
Beispiel #4
0
def test_scale_edges():
    """
    test that the edge values decrease to match a timeseries with smaller values
    """
    scaled = np.linspace(-1, 1, 1000)
    src = np.linspace(-1, 1, 1000)
    ref = scaled * 0.5
    percentiles = np.linspace(0, 100, 100)
    perc_src = ml_percentile(src, percentiles)
    perc_ref = ml_percentile(ref, percentiles)

    edge_scaled = scale_edges(scaled=scaled,
                              src=src,
                              ref=ref,
                              perc_src=perc_src,
                              perc_ref=perc_ref)
    test_low = np.abs(edge_scaled[:9]) < np.abs(src[:9])
    test_high = np.abs(edge_scaled[990:]) < np.abs(src[990:])

    assert np.all(test_low) and np.all(test_high)
Beispiel #5
0
def test_interp_unique():
    """
    test iterative filling of array
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = interp_uniq(p)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [1., 1.025, 1.05, 1.1, 1.55, 3.275,
                                          5., 5.3, 8.4, 9.2, 9.6, 9.8, 10.])
Beispiel #6
0
def test_interp_unique():
    """
    test iterative filling of array
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = interp_uniq(p)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [1., 1.025, 1.05, 1.1, 1.55, 3.275,
                                          5., 5.3, 8.4, 9.2, 9.6, 9.8, 10.])
Beispiel #7
0
def test_unique_percentile_interpolation():
    """
    test generation of unique percentile values
    by interpolation or order k
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_interpolate(p, percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [
        1., 1.025, 1.05, 1.1, 2., 3.5, 5., 5.3, 8.4, 8.93333333, 9.46666667,
        9.73333333, 10.
    ])
Beispiel #8
0
def test_unique_percentile_interpolation():
    """
    test generation of unique percentile values
    by interpolation or order k
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_interpolate(p,
                                              percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [1.,   1.025,   1.05,   1.1,
                                          2.,   3.5,   5.,   5.3,
                                          8.4,   8.93333333,   9.46666667,   9.73333333,  10.])
Beispiel #9
0
def test_unique_percentile_beta():
    """
    test generation of unique percentile values
    by fitting CDF of a beta distribution
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_beta(p,
                                       percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [1.,   1.00013305,   1.00371443,   1.08957949,
                                          1.50096583,   2.50963215,   4.18025716,   6.24205978,
                                          8.16856852,   9.45324093,   9.94854144,   9.99597975,  10.],
                               decimal=5)
Beispiel #10
0
def test_unique_percentile_beta():
    """
    test generation of unique percentile values
    by fitting CDF of a beta distribution
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_beta(p, percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [
        1., 1.00013305, 1.00371443, 1.08957949, 1.50096583, 2.50963215,
        4.18025716, 6.24205978, 8.16856852, 9.45324093, 9.94854144, 9.99597975,
        10.
    ],
                               decimal=5)
Beispiel #11
0
def cdf_beta_match(src,
                   ref,
                   minobs=20,
                   lin_edge_scaling=True,
                   nbins=100,
                   **kwargs):
    """
    takes the source timeseries, fits a beta distribution through its CDF and
    finds unique percentile values corresponding to the number of bins used.
    The size of bins is by default dynamically increased in case too few
    observations (less than 20) are in a bin, leading to overfitting. Based on
    Moesinger et al. (2020).

    These values are used to scale the source to the reference by linear
    interpolation between each pair of bin edges.

    Uses the edge values linear scaling method described in Moesinger et
    al. (2020) by default.

    Parameters
    ----------
    src: numpy.array
        input dataset which will be scaled
    ref: numpy.array
        src will be scaled to this dataset
    minobs : int
        Minimum desired number of observations in a bin.
    nbins: int, optional
        Number of bins to use for estimation of the CDF
    ** kwargs: dict
        keywords to be passed onto the gen_cdf_match() function

    Returns
    -------
    CDF matched values: numpy.array
        dataset src with CDF as ref
    """
    percentiles = np.linspace(0, 100, nbins)

    if minobs is not None:
        percentiles = utils.resize_percentiles(src, percentiles, minobs)

    # match the two arrays
    if len(src) != len(ref):
        max_obs = max(len(src), len(ref))
        d_perc = np.arange(max_obs, dtype="float") / (max_obs - 1) * 100

        if len(src) < len(ref):
            src = utils.ml_percentile(src, d_perc)
        else:
            ref = utils.ml_percentile(ref, d_perc)

    # calculate percentiles using matlab method
    perc_src = utils.ml_percentile(src, percentiles)
    perc_ref = utils.ml_percentile(ref, percentiles)

    # fit beta distributions through the source percentiles
    if np.unique(perc_src).size == 1:
        warn(
            "There is only one percentile value on which the scaling is based")
    else:
        perc_src = utils.unique_percentiles_beta(perc_src,
                                                 percentiles=percentiles)

    return gen_cdf_match(
        src,
        perc_src,
        perc_ref,
        lin_edge_scaling=lin_edge_scaling,
        ref=ref,
        **kwargs,
    )