def test_scatter_add(backend): x = zeros(backend, (3, 3)) y = ones(backend, (2, 3)) indices = to_array(backend, [0, 2]) z = scatter_add(backend, x, indices, y, 0) assert np.isclose(z[0, 0], 1.0) assert np.isclose(z[1, 0], 0.0) assert np.isclose(z[2, 0], 1.0) x = zeros(backend, (3, 3)) y = ones(backend, (3, 2)) z = scatter_add(backend, x, indices, y, 1) assert np.isclose(z[0, 0], 1.0) assert np.isclose(z[0, 1], 0.0) assert np.isclose(z[0, 2], 1.0)
def add(y_pdf_1, bins_1, y_pdf_2, bins_2, bins_out, bin_axis=1): """ Calculate the discretized PDF of the sum of two random variables represented by their respective discretized PDFs. Args: y_pdf_1: The discretized PDF of the first random variable. bins_1: The bin boundaries corresponding to 'y_pdf_1'. y_pdf_2: The discretized PDF of the second random variable. bins_2: The bin boundaries corresponding to 'y_pdf_2'. bins_out: The bins boundaries for the resulting discretized PDF. bin_axis: The dimension along which the probabilities are oriented. Return: A tensor containing the discretized PDF corresponding to the sum of the two given PDFs. """ if len(y_pdf_1.shape) == 1: bin_axis = 0 xp = get_array_module(y_pdf_1) bins_1_c = 0.5 * (bins_1[1:] + bins_1[:-1]) dx_1 = bins_1[1:] - bins_1[:-1] shape_1 = [1] * len(y_pdf_1.shape) shape_1[bin_axis] = numel(bins_1) - 1 dx_1 = dx_1.reshape(shape_1) p_1 = y_pdf_1 * dx_1 bins_2_c = 0.5 * (bins_2[1:] + bins_2[:-1]) dx_2 = bins_2[1:] - bins_2[:-1] shape_2 = [1] * len(y_pdf_2.shape) shape_2[bin_axis] = numel(bins_2) - 1 dx_2 = dx_2.reshape(shape_2) p_2 = y_pdf_2 * dx_2 out_shape = list(y_pdf_1.shape) out_shape[bin_axis] = numel(bins_out) - 1 p_out = zeros(xp, out_shape, like=y_pdf_1) rank = len(y_pdf_1.shape) selection = [slice(0, None)] * rank n_bins = numel(bins_1_c) offsets = sample_uniform(xp, (n_bins,), like=bins_2) for i in range(n_bins): d_b = bins_1[i + 1] - bins_1[i] b = bins_1[i] + offsets[i] * d_b selection[bin_axis] = i bins = bins_2_c + b probs = p_1[tuple(selection)] * p_2 inds = digitize(xp, bins, bins_out) - 1 p_out = scatter_add(xp, p_out, inds, probs, bin_axis) return normalize(p_out, bins_out, bin_axis=bin_axis)
def sample_posterior(y_pred, bins, n_samples=1, bin_axis=1): """ Sample the posterior distribution described by the predicted PDF. The sampling is performed by interpolating the inverse of the cumulative distribution function to value sampled from a uniform distribution. Args: y_pred: A rank-k tensor containing the predicted bin-probabilities along the axis specified by ``quantile_axis``. bins: The bin bounrdaries corresponding to the predicted bin probabilities. n_samples: How many samples to generate for each prediction. bin_axis: The axis in y_pred along which the predicted bin probabilities are located. Returns: A rank-k tensor with the values along ``bin_axis`` replaced by samples of the posterior distribution. """ if len(y_pred.shape) == 1: bin_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) y_cdf = posterior_cdf(y_pred, bins, bin_axis=bin_axis) n_bins = len(bins) output_shape = list(y_cdf.shape) output_shape[bin_axis] = n_samples results = zeros(xp, output_shape, like=y_pred) y_index = [slice(0, None)] * n_dims y_index[bin_axis] = slice(0, 1) y_l = y_cdf[tuple(y_index)] b_l = bins[0] samples = as_type(xp, sample_uniform(xp, tuple(output_shape)), y_cdf) for i in range(1, n_bins): y_index = [slice(0, None)] * n_dims y_index[bin_axis] = slice(i, i + 1) y_r = y_cdf[tuple(y_index)] b_r = bins[i] mask = as_type(xp, (y_l < samples) * (y_r >= samples), y_l) results += b_l * (y_r - samples) * mask results += b_r * (samples - y_l) * mask results /= mask * (y_r - y_l) + (1.0 - mask) b_l = b_r y_l = y_r mask = as_type(xp, y_r < samples, y_r) results += mask * b_r return results
def probability_less_than(y_pred, quantiles, y, quantile_axis=1): """ Calculate the probability that the predicted value is less than a given threshold value ``y`` given a tensor of predicted quantiles ``y_pred``. The probability :math:`P(Y > y)` is calculated by using the predicted quantiles to estimate the CDF of the posterior distribution, which is then interpolate to the given threshold value. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: The quantile fractions corresponding to the predicted quantiles. y: The threshold value. quantile_axis: The axis in y_pred along which the predicted quantiles are found. Returns: A rank-(k-1) tensor containing for each set of predicted quantiles the estimated probability of the true value being larger than the given threshold. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) x_cdf, y_cdf = cdf(y_pred, quantiles, quantile_axis=quantile_axis) output_shape = list(x_cdf.shape) del output_shape[quantile_axis] probabilities = zeros(xp, output_shape, like=y_pred) y_l = y_cdf[0] x_index = [slice(0, None)] * n_dims x_index[quantile_axis] = 0 x_l = x_cdf[tuple(x_index)] for i in range(1, len(y_cdf)): y_r = y_cdf[i] x_index[quantile_axis] = i x_r = x_cdf[tuple(x_index)] mask = as_type(xp, (x_l < y) * (x_r >= y), x_l) probabilities += y_l * (x_r - y) * mask probabilities += y_r * (y - x_l) * mask probabilities /= mask * (x_r - x_l) + (1.0 - mask) y_l = y_r x_l = x_r mask = as_type(xp, x_r < y, x_r) probabilities += mask return probabilities
def sample_posterior(y_pred, quantiles, n_samples=1, quantile_axis=1): """ Sample the posterior distribution described by the predicted quantiles. The sampling is performed by interpolating the inverse of the cumulative distribution function to value sampled from a uniform distribution. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: The quantile fractions corresponding to the predicted quantiles. n_samples: How many samples to generate for each prediction. quantile_axis: The axis in y_pred along which the predicted quantiles are found. Returns: A rank-k tensor with the values along ``quantile_axis`` replaced by samples of the posterior distribution. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) x_cdf, y_cdf = cdf(y_pred, quantiles, quantile_axis=quantile_axis) output_shape = list(y_pred.shape) output_shape[quantile_axis] = n_samples samples = as_type(xp, sample_uniform(xp, tuple(output_shape)), y_cdf) results = zeros(xp, samples.shape, like=y_pred) y_l = y_cdf[0] x_index = [slice(0, None)] * n_dims x_index[quantile_axis] = slice(0, 1) x_l = x_cdf[tuple(x_index)] for i in range(1, len(y_cdf)): y_r = y_cdf[i] x_index[quantile_axis] = slice(i, i + 1) x_r = x_cdf[tuple(x_index)] mask = as_type(xp, (samples > y_l) * (samples <= y_r), y_l) results += (x_l * (y_r - samples)) * mask results += (x_r * (samples - y_l)) * mask results /= mask * (y_r - y_l) + (1.0 - mask) y_l = y_r x_l = x_r return results
def quantile_loss(y_pred, quantiles, y_true, quantile_axis=1): """ Calculate the quantile loss for all predicted quantiles. Args: y_pred: A k-tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. y_true: A tensor of rank k-1 containing the corresponding true values. quantiles: A vector or list containing the quantile fractions corresponding to the predicted quantiles. quantile_axis: The axis along which ``y_pred`` contains the the predicted quantiles. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) y_true_shape = list(y_pred.shape) y_true_shape[quantile_axis] = 1 try: y_true = reshape(xp, y_true, y_true_shape) except Exception: raise InvalidDimensionException( "Could not reshape 'y_true' argument into expected shape " f"{y_true_shape}." ) quantiles = to_array(xp, quantiles) quantiles_shape = [1] * n_dims quantiles_shape[quantile_axis] = len(quantiles) quantiles = reshape(xp, quantiles, quantiles_shape) dy = y_pred - y_true loss = zeros(xp, dy.shape, like=y_pred) mask = as_type(xp, dy > 0.0, dy) loss += mask * ((1.0 - quantiles) * dy) loss += -(1.0 - mask) * (quantiles * dy) return loss
def test_zeros(backend): x = zeros(backend, (1, 1)) assert x[0, 0] == 0.0
def crps(y_pred, y_true, quantiles, quantile_axis=1): r""" Compute the Continuous Ranked Probability Score (CRPS) for given predicted quantiles. This function uses a piece-wise linear fit to the approximate posterior CDF obtained from the predicted quantiles in :code:`y_pred` to approximate the continuous ranked probability score (CRPS): .. math:: CRPS(\mathbf{y}, x) = \int_{-\infty}^\infty (F_{x | \mathbf{y}}(x') - \mathrm{1}_{x < x'})^2 \: dx' Args: y_pred: Tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. y_true: Array containing the true point values. quantiles: 1D array containing the quantile fractions corresponding corresponding to the predicted quantiles. Returns: Tensor of rank :math:`k - 1` containing the CRPS values for each of the predictions in ``y_pred``. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) x_cdf, y_cdf = cdf(y_pred, quantiles, quantile_axis=quantile_axis) y_true_shape = list(x_cdf.shape) y_true_shape[quantile_axis] = 1 y_true = to_array(xp, y_true) y_true = reshape(xp, y_true, y_true_shape) mask = as_type(xp, x_cdf > y_true, y_pred) ind = ones(xp, x_cdf.shape, like=y_pred) * mask output_shape = list(x_cdf.shape) del output_shape[quantile_axis] integral = zeros(xp, output_shape, like=y_pred) x_index = [slice(0, None)] * n_dims y_l = y_cdf[0] x_index[quantile_axis] = 0 x_l = x_cdf[tuple(x_index)] ind_l = ind[tuple(x_index)] for i in range(1, len(y_cdf)): y_r = y_cdf[i] x_index[quantile_axis] = i x_r = x_cdf[tuple(x_index)] ind_r = ind[tuple(x_index)] result = (ind_l - y_l) ** 2 result += (ind_r - y_r) ** 2 dx = x_r - x_l result *= 0.5 * dx integral += result y_l = y_r x_l = x_r ind_l = ind_r return integral
def cdf(y_pred, quantiles, quantile_axis=1): """ Calculates the cumulative distribution function (CDF) from predicted quantiles. Args: y_pred: Array containing a range of predicted quantiles. The array is expected to contain the quantiles along the axis given by ``quantile_axis.`` quantiles: Array containing quantile fraction corresponding to the the predicted quantiles. quantile_axis: The index of the axis f the ``y_pred`` array, along which the quantiles are found. Returns: Tuple ``(x_cdf, y_cdf)`` of x and corresponding y-values of the CDF corresponding to quantiles given by ``y_pred``. Raises: InvalidArrayTypeException: When the data is provided neither as numpy array nor as torch tensor. InvalidDimensionException: When the provided predicted quantiles do not match the provided number of quantiles. """ if len(y_pred.shape) == 1: quantile_axis = 0 if y_pred.shape[quantile_axis] != len(quantiles): raise InvalidDimensionException( "Dimensions of the provided array 'y_pred' do not match the" "provided number of quantiles." ) output_shape = list(y_pred.shape) xp = get_array_module(y_pred) y_cdf = quantiles y_cdf = concatenate( xp, [zeros(xp, 1, like=y_cdf), y_cdf, ones(xp, 1, like=y_cdf)], 0 ) selection = [slice(0, None)] * len(y_pred.shape) selection_c = copy(selection) selection_c[quantile_axis] = 0 selection_c = tuple(selection_c) selection_r = copy(selection) selection_r[quantile_axis] = 1 selection_r = tuple(selection_r) dx = y_pred[selection_r] - y_pred[selection_c] dx /= quantiles[1] - quantiles[0] x_cdf_l = y_pred[selection_c] - 2.0 * quantiles[0] * dx x_cdf_l = expand_dims(xp, x_cdf_l, quantile_axis) selection_l = copy(selection) selection_l[quantile_axis] = -2 selection_l = tuple(selection_l) selection_c = copy(selection) selection_c[quantile_axis] = -1 selection_c = tuple(selection_c) dx = y_pred[selection_c] - y_pred[selection_l] dx /= quantiles[-1] - quantiles[-2] x_cdf_r = y_pred[selection_c] + 2.0 * (1.0 - quantiles[-1]) * dx x_cdf_r = expand_dims(xp, x_cdf_r, quantile_axis) x_cdf = concatenate(xp, [x_cdf_l, y_pred, x_cdf_r], quantile_axis) return x_cdf, y_cdf