def test_derive_edge_parameters(): """ assert the result types for the edge parameters """ src = np.linspace(-1, 1, 1000) ref = src * 0.5 percentiles = np.linspace(0, 100, 100) perc_src = ml_percentile(src, percentiles) perc_ref = ml_percentile(ref, percentiles) a, b, c = derive_edge_parameters(src=src, ref=ref, perc_src=perc_src, perc_ref=perc_ref) assert (type(a) is tuple) & (type(b) is tuple) & (type(c) is np.ndarray)
def test_ml_percentile(): """ Test the percentile implementation that is used in Matlab. """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] perc_should = [1.0, 1.0, 1.0, 1.1, 2.0, 2.0, 5.0, 5.3, 8.4, 10., 10., 10., 10.] perc = ml_percentile(arr1, percentiles) nptest.assert_almost_equal(perc, perc_should)
def test_scale_edges(): """ test that the edge values decrease to match a timeseries with smaller values """ scaled = np.linspace(-1, 1, 1000) src = np.linspace(-1, 1, 1000) ref = scaled * 0.5 percentiles = np.linspace(0, 100, 100) perc_src = ml_percentile(src, percentiles) perc_ref = ml_percentile(ref, percentiles) edge_scaled = scale_edges(scaled=scaled, src=src, ref=ref, perc_src=perc_src, perc_ref=perc_ref) test_low = np.abs(edge_scaled[:9]) < np.abs(src[:9]) test_high = np.abs(edge_scaled[990:]) < np.abs(src[990:]) assert np.all(test_low) and np.all(test_high)
def test_interp_unique(): """ test iterative filling of array """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] p = ml_percentile(arr1, percentiles) src_perc = interp_uniq(p) assert len(p) == len(src_perc) nptest.assert_almost_equal(src_perc, [1., 1.025, 1.05, 1.1, 1.55, 3.275, 5., 5.3, 8.4, 9.2, 9.6, 9.8, 10.])
def test_unique_percentile_interpolation(): """ test generation of unique percentile values by interpolation or order k """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] p = ml_percentile(arr1, percentiles) src_perc = unique_percentiles_interpolate(p, percentiles=percentiles) assert len(p) == len(src_perc) nptest.assert_almost_equal(src_perc, [ 1., 1.025, 1.05, 1.1, 2., 3.5, 5., 5.3, 8.4, 8.93333333, 9.46666667, 9.73333333, 10. ])
def test_unique_percentile_interpolation(): """ test generation of unique percentile values by interpolation or order k """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] p = ml_percentile(arr1, percentiles) src_perc = unique_percentiles_interpolate(p, percentiles=percentiles) assert len(p) == len(src_perc) nptest.assert_almost_equal(src_perc, [1., 1.025, 1.05, 1.1, 2., 3.5, 5., 5.3, 8.4, 8.93333333, 9.46666667, 9.73333333, 10.])
def test_unique_percentile_beta(): """ test generation of unique percentile values by fitting CDF of a beta distribution """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] p = ml_percentile(arr1, percentiles) src_perc = unique_percentiles_beta(p, percentiles=percentiles) assert len(p) == len(src_perc) nptest.assert_almost_equal(src_perc, [1., 1.00013305, 1.00371443, 1.08957949, 1.50096583, 2.50963215, 4.18025716, 6.24205978, 8.16856852, 9.45324093, 9.94854144, 9.99597975, 10.], decimal=5)
def test_unique_percentile_beta(): """ test generation of unique percentile values by fitting CDF of a beta distribution """ arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10]) percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] p = ml_percentile(arr1, percentiles) src_perc = unique_percentiles_beta(p, percentiles=percentiles) assert len(p) == len(src_perc) nptest.assert_almost_equal(src_perc, [ 1., 1.00013305, 1.00371443, 1.08957949, 1.50096583, 2.50963215, 4.18025716, 6.24205978, 8.16856852, 9.45324093, 9.94854144, 9.99597975, 10. ], decimal=5)
def cdf_beta_match(src, ref, minobs=20, lin_edge_scaling=True, nbins=100, **kwargs): """ takes the source timeseries, fits a beta distribution through its CDF and finds unique percentile values corresponding to the number of bins used. The size of bins is by default dynamically increased in case too few observations (less than 20) are in a bin, leading to overfitting. Based on Moesinger et al. (2020). These values are used to scale the source to the reference by linear interpolation between each pair of bin edges. Uses the edge values linear scaling method described in Moesinger et al. (2020) by default. Parameters ---------- src: numpy.array input dataset which will be scaled ref: numpy.array src will be scaled to this dataset minobs : int Minimum desired number of observations in a bin. nbins: int, optional Number of bins to use for estimation of the CDF ** kwargs: dict keywords to be passed onto the gen_cdf_match() function Returns ------- CDF matched values: numpy.array dataset src with CDF as ref """ percentiles = np.linspace(0, 100, nbins) if minobs is not None: percentiles = utils.resize_percentiles(src, percentiles, minobs) # match the two arrays if len(src) != len(ref): max_obs = max(len(src), len(ref)) d_perc = np.arange(max_obs, dtype="float") / (max_obs - 1) * 100 if len(src) < len(ref): src = utils.ml_percentile(src, d_perc) else: ref = utils.ml_percentile(ref, d_perc) # calculate percentiles using matlab method perc_src = utils.ml_percentile(src, percentiles) perc_ref = utils.ml_percentile(ref, percentiles) # fit beta distributions through the source percentiles if np.unique(perc_src).size == 1: warn( "There is only one percentile value on which the scaling is based") else: perc_src = utils.unique_percentiles_beta(perc_src, percentiles=percentiles) return gen_cdf_match( src, perc_src, perc_ref, lin_edge_scaling=lin_edge_scaling, ref=ref, **kwargs, )