def calculate_samples(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.sample_posterior(y_pred, quantiles, n_samples=n_samples, quantile_axis=self.quantile_axis)
def test_sample_gaussian(backend): """ Ensures that array of random samples has array type corresponding to the right backend module. """ samples = sample_gaussian(backend, (10, )) assert get_array_module(samples) == backend
def calculate_prob(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.probability_less_than(y_pred, quantiles, y, quantile_axis=self.quantile_axis)
def posterior_quantiles(y_pred, bins, quantiles, bin_axis=1): if len(y_pred.shape) == 1: bin_axis = 0 n_y = y_pred.shape[bin_axis] n_b = len(bins) _check_dimensions(n_y, n_b) xp = get_array_module(y_pred) y_cdf = posterior_cdf(y_pred, bins, bin_axis=bin_axis) n = len(y_pred.shape) dx = bins[1:] - bins[:-1] x_shape = [1] * n x_shape[bin_axis] = numel(bins) dx = pad_zeros_left(xp, dx, 1, 0) dx = reshape(xp, dx, x_shape) y_qs = [] for q in quantiles: mask = as_type(xp, y_cdf <= q, y_cdf) y_q = bins[0] + xp.sum(mask * dx, bin_axis) y_q = expand_dims(xp, y_q, bin_axis) y_qs.append(y_q) y_q = concatenate(xp, y_qs, bin_axis) return y_q
def calculate_quantiles(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.posterior_quantiles(y_pred, bins, quantiles, bin_axis=self.bin_axis)
def posterior_mean(y_pdf, bins, bin_axis=1): """ Calculate posterior mean from predicted PDFs. Args: y_pdf: Tensor containing the predicted PDFs. bins: The bin-boundaries corresponding to the predictions. bin_axis: The index of the tensor axis which contains the predictions for each bin. Return: Tensor with rank reduced by one compared to ``y_pdf`` and with the values along ``bin_axis`` of ``y_pdf`` replaced with the mean value of the PDF. """ if len(y_pdf.shape) == 1: bin_axis = 0 n_y = y_pdf.shape[bin_axis] n_b = len(bins) _check_dimensions(n_y, n_b) xp = get_array_module(y_pdf) n = len(y_pdf.shape) shape = [1] * n shape[bin_axis] = -1 bins_r = reshape(xp, 0.5 * (bins[1:] + bins[:-1]), shape) return trapz(xp, bins_r * y_pdf, bins, bin_axis)
def posterior_cdf(y_pdf, bins, bin_axis=1): """ Calculate CDF from predicted probability density function. Args: y_pdf: Tensor containing the predicted PDFs. bins: The bin-boundaries corresponding to the predictions. bin_axis: The index of the tensor axis which contains the predictions for each bin. Return: Tensor with the same shape as ``y_pdf`` but with the values transformed to represent the CDF corresponding to the predicted PDF in ``y_pdf``. """ if len(y_pdf.shape) == 1: bin_axis = 0 n_y = y_pdf.shape[bin_axis] n_b = len(bins) _check_dimensions(n_y, n_b) xp = get_array_module(y_pdf) n = len(y_pdf.shape) y_cdf = cumtrapz(xp, y_pdf, bins, bin_axis) selection = [slice(0, None)] * n selection[bin_axis] = slice(-1, None) y_cdf = y_cdf / y_cdf[tuple(selection)] return y_cdf
def calculate_probability(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.probability_larger_than(y_pred, bins, y, bin_axis=self.bin_axis)
def probability_less_than(y_pdf, bins, y, bin_axis=1): """ Calculate the probability of a sample being less than a given value for a tensor of predicted PDFs. Args: y_pdf: Tensor containing the predicted PDFs. bins: The bin-boundaries corresponding to the predictions. y: The sample value. bin_axis: The index of the tensor axis which contains the predictions for each bin. Return: Tensor with rank reduced by one compared to ``y_pdf`` and with the values along ``bin_axis`` of ``y_pdf`` replaced with the probability that a sample of the distribution is smaller than the given value ``y``. """ if len(y_pdf.shape) == 1: bin_axis = 0 n_y = y_pdf.shape[bin_axis] n_b = len(bins) _check_dimensions(n_y, n_b) xp = get_array_module(y_pdf) n = len(y_pdf.shape) x = 0.5 * (bins[1:] + bins[:-1]) mask = x < y shape = [1] * n shape[bin_axis] = -1 mask = as_type(xp, reshape(xp, mask, shape), y_pdf) return trapz(xp, mask * y_pdf, bins, bin_axis)
def calculate_quantile_function(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.quantile_function(y_pred, y, bins, bin_axis=self.bin_axis)
def calculate_samples(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.sample_posterior(y_pred, bins, n_samples=n_samples, bin_axis=self.bin_axis)
def sample_posterior_gaussian(y_pred, quantiles, n_samples=1, quantile_axis=1): """ Sample the posterior distribution described by the predicted quantiles. The sampling is performed by fitting a Gaussian to the predicted a posteriori distribution and sampling from it. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: The quantile fractions corresponding to the predicted quantiles. n_samples: How many samples to generate for each prediction. quantile_axis: The axis in y_pred along which the predicted quantiles are found. Returns: A rank-k tensor with the values along ``quantile_axis`` replaced by samples of the posterior distribution. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) mu, sigma = fit_gaussian_to_quantiles(y_pred, quantiles, quantile_axis=quantile_axis) output_shape = list(y_pred.shape) output_shape[quantile_axis] = n_samples samples = sample_gaussian(xp, tuple(output_shape)) return mu + sigma * samples
def calculate_crps(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.crps(y_pred, y_true, quantiles, quantile_axis=self.quantile_axis)
def test_to_array(backend): """ Converts numpy array to array of given backend and ensures that corresponding module object matches the backend. """ x = np.arange(10) array = to_array(backend, x) assert get_array_module(array) == backend
def test_get_array_module(backend): """ Ensures that get_array_module returns right array object when given an array created using the arange method of the corresponding module object. """ x = backend.ones(10) module = get_array_module(x) assert module == backend
def add(y_pdf_1, bins_1, y_pdf_2, bins_2, bins_out, bin_axis=1): """ Calculate the discretized PDF of the sum of two random variables represented by their respective discretized PDFs. Args: y_pdf_1: The discretized PDF of the first random variable. bins_1: The bin boundaries corresponding to 'y_pdf_1'. y_pdf_2: The discretized PDF of the second random variable. bins_2: The bin boundaries corresponding to 'y_pdf_2'. bins_out: The bins boundaries for the resulting discretized PDF. bin_axis: The dimension along which the probabilities are oriented. Return: A tensor containing the discretized PDF corresponding to the sum of the two given PDFs. """ if len(y_pdf_1.shape) == 1: bin_axis = 0 xp = get_array_module(y_pdf_1) bins_1_c = 0.5 * (bins_1[1:] + bins_1[:-1]) dx_1 = bins_1[1:] - bins_1[:-1] shape_1 = [1] * len(y_pdf_1.shape) shape_1[bin_axis] = numel(bins_1) - 1 dx_1 = dx_1.reshape(shape_1) p_1 = y_pdf_1 * dx_1 bins_2_c = 0.5 * (bins_2[1:] + bins_2[:-1]) dx_2 = bins_2[1:] - bins_2[:-1] shape_2 = [1] * len(y_pdf_2.shape) shape_2[bin_axis] = numel(bins_2) - 1 dx_2 = dx_2.reshape(shape_2) p_2 = y_pdf_2 * dx_2 out_shape = list(y_pdf_1.shape) out_shape[bin_axis] = numel(bins_out) - 1 p_out = zeros(xp, out_shape, like=y_pdf_1) rank = len(y_pdf_1.shape) selection = [slice(0, None)] * rank n_bins = numel(bins_1_c) offsets = sample_uniform(xp, (n_bins,), like=bins_2) for i in range(n_bins): d_b = bins_1[i + 1] - bins_1[i] b = bins_1[i] + offsets[i] * d_b selection[bin_axis] = i bins = bins_2_c + b probs = p_1[tuple(selection)] * p_2 inds = digitize(xp, bins, bins_out) - 1 p_out = scatter_add(xp, p_out, inds, probs, bin_axis) return normalize(p_out, bins_out, bin_axis=bin_axis)
def sample_posterior(y_pred, bins, n_samples=1, bin_axis=1): """ Sample the posterior distribution described by the predicted PDF. The sampling is performed by interpolating the inverse of the cumulative distribution function to value sampled from a uniform distribution. Args: y_pred: A rank-k tensor containing the predicted bin-probabilities along the axis specified by ``quantile_axis``. bins: The bin bounrdaries corresponding to the predicted bin probabilities. n_samples: How many samples to generate for each prediction. bin_axis: The axis in y_pred along which the predicted bin probabilities are located. Returns: A rank-k tensor with the values along ``bin_axis`` replaced by samples of the posterior distribution. """ if len(y_pred.shape) == 1: bin_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) y_cdf = posterior_cdf(y_pred, bins, bin_axis=bin_axis) n_bins = len(bins) output_shape = list(y_cdf.shape) output_shape[bin_axis] = n_samples results = zeros(xp, output_shape, like=y_pred) y_index = [slice(0, None)] * n_dims y_index[bin_axis] = slice(0, 1) y_l = y_cdf[tuple(y_index)] b_l = bins[0] samples = as_type(xp, sample_uniform(xp, tuple(output_shape)), y_cdf) for i in range(1, n_bins): y_index = [slice(0, None)] * n_dims y_index[bin_axis] = slice(i, i + 1) y_r = y_cdf[tuple(y_index)] b_r = bins[i] mask = as_type(xp, (y_l < samples) * (y_r >= samples), y_l) results += b_l * (y_r - samples) * mask results += b_r * (samples - y_l) * mask results /= mask * (y_r - y_l) + (1.0 - mask) b_l = b_r y_l = y_r mask = as_type(xp, y_r < samples, y_r) results += mask * b_r return results
def calculate_quantiles(y_pred): module = get_array_module(y_pred) new_quantiles = to_array(module, quantiles, like=y_pred) current_quantiles = to_array(module, self.quantiles, like=y_pred) return qq.posterior_quantiles( y_pred, quantiles=current_quantiles, new_quantiles=new_quantiles, quantile_axis=self.quantile_axis, )
def posterior_maximum(y_pred, quantiles, quantile_axis=1): if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) x, y = pdf(y_pred, quantiles, quantile_axis=quantile_axis) indices = argmax(xp, y, axes=quantile_axis) shape = indices.shape indices = expand_dims(xp, indices, quantile_axis) return take_along_axis(xp, x, indices, axis=quantile_axis).reshape(shape)
def probability_less_than(y_pred, quantiles, y, quantile_axis=1): """ Calculate the probability that the predicted value is less than a given threshold value ``y`` given a tensor of predicted quantiles ``y_pred``. The probability :math:`P(Y > y)` is calculated by using the predicted quantiles to estimate the CDF of the posterior distribution, which is then interpolate to the given threshold value. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: The quantile fractions corresponding to the predicted quantiles. y: The threshold value. quantile_axis: The axis in y_pred along which the predicted quantiles are found. Returns: A rank-(k-1) tensor containing for each set of predicted quantiles the estimated probability of the true value being larger than the given threshold. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) x_cdf, y_cdf = cdf(y_pred, quantiles, quantile_axis=quantile_axis) output_shape = list(x_cdf.shape) del output_shape[quantile_axis] probabilities = xp.zeros(output_shape) y_l = y_cdf[0] x_index = [slice(0, None)] * n_dims x_index[quantile_axis] = 0 x_l = x_cdf[tuple(x_index)] for i in range(1, len(y_cdf)): y_r = y_cdf[i] x_index[quantile_axis] = i x_r = x_cdf[tuple(x_index)] mask = as_type(xp, (x_l < y) * (x_r >= y), x_l) probabilities += y_l * (x_r - y) * mask probabilities += y_r * (y - x_l) * mask probabilities /= (mask * (x_r - x_l) + (1.0 - mask)) y_l = y_r x_l = x_r mask = as_type(xp, x_r < y, x_r) probabilities += mask return probabilities
def posterior_quantiles(y_pred, quantiles, new_quantiles, quantile_axis=1): r""" Computes the median of the posterior distribution defined by an array of predicted quantiles. Args: y_pred: A rank-k tensor of predicted quantiles with the quantiles located along the axis given by ``quantile_axis``. quantiles: The quantile fractions corresponding to the quantiles located along the quantile axis. quantile_axis: The axis along which the quantiles are located. Returns: Rank k-1 tensor containing the posterior median for the provided inputs. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n = len(y_pred.shape) indices = arange(xp, 0, len(quantiles), 1.0) selection = [slice(0, None)] * n y_qs = [] for q in new_quantiles: mask = (quantiles[1:] > q) * (quantiles[:-1] <= q) index = indices[:-1][mask] if len(index) == 0: if quantiles[0] < q: selection[quantile_axis] = 0 selection_l = tuple(selection) return y_pred[selection_l] else: selection[quantile_axis] = -1 selection_r = tuple(selection) return y_pred[selection_r] index = int(index[0]) d = quantiles[index + 1] - quantiles[index] w_l = (quantiles[index + 1] - q) / d w_r = (q - quantiles[index]) / d selection = [slice(0, None)] * n selection[quantile_axis] = index selection_l = tuple(selection) selection[quantile_axis] = index + 1 selection_r = tuple(selection) y_q = w_l * y_pred[selection_l] + w_r * y_pred[selection_r] y_q = expand_dims(xp, y_q, quantile_axis) y_qs.append(y_q) return concatenate(xp, y_qs, quantile_axis)
def fit_gaussian_to_quantiles(y_pred, quantiles, quantile_axis=1): """ Fits Gaussian distributions to predicted quantiles. Fits mean and standard deviation values to quantiles by minimizing the mean squared distance of the predicted quantiles and those of the corresponding Gaussian distribution. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: Array of shape `(m,)` containing the quantile fractions corresponding to the predictions in ``y_pred``. Returns: Tuple ``(mu, sigma)`` of tensors of rank k-1 containing the mean and standard deviations of the Gaussian distributions corresponding to the predictions in ``y_pred``. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) x = to_array(xp, norm.ppf(quantiles)) n_dims = len(y_pred.shape) x_shape = [ 1, ] * n_dims x_shape[quantile_axis] = -1 x_shape = tuple(x_shape) x = reshape(xp, x, x_shape) output_shape = list(y_pred.shape) output_shape[quantile_axis] = 1 output_shape = tuple(output_shape) d2e_00 = numel(x) d2e_01 = x.sum() d2e_10 = x.sum() d2e_11 = (x**2).sum() d2e_det_inv = 1.0 / (d2e_00 * d2e_11 - d2e_01 * d2e_11) d2e_inv_00 = d2e_det_inv * d2e_11 d2e_inv_01 = -d2e_det_inv * d2e_01 d2e_inv_10 = -d2e_det_inv * d2e_10 d2e_inv_11 = d2e_det_inv * d2e_00 x = reshape(xp, x, x_shape) de_0 = reshape(xp, -(y_pred - x).sum(axis=quantile_axis), output_shape) de_1 = reshape(xp, -(x * (y_pred - x)).sum(axis=quantile_axis), output_shape) mu = -(d2e_inv_00 * de_0 + d2e_inv_01 * de_1) sigma = 1.0 - (d2e_inv_10 * de_0 + d2e_inv_11 * de_1) return mu, sigma
def __call__(self, x, dist_axis=1): """ Evaluate the a priori. Args: x: Tensor containing the values at which to evaluate the a priori. dist_axis: The axis along which the tensor x is sorted. Returns; Tensor with the same size as 'x' containing the values of the a priori at 'x' obtained by linear interpolation. """ if len(x.shape) == 1: dist_axis = 0 xp = get_array_module(x) n_dims = len(x.shape) n = x.shape[dist_axis] x_index = [slice(0, None)] * n_dims x_index[dist_axis] = 0 selection_l = [slice(0, None)] * n_dims selection_l[dist_axis] = slice(0, -1) selection_l = tuple(selection_l) selection_r = [slice(0, None)] * n_dims selection_r[dist_axis] = slice(1, None) selection_r = tuple(selection_r) r_shape = [1] * n_dims r_shape[dist_axis] = -1 r_x = self.x.reshape(r_shape) r_y = self.y.reshape(r_shape) r_x_l = r_x[selection_l] r_x_r = r_x[selection_r] r_y_l = r_y[selection_l] r_y_r = r_y[selection_r] rs = [] for i in range(0, n): x_index[dist_axis] = slice(i, i + 1) index = tuple(x_index) x_i = x[index] mask = as_type(xp, (r_x_l < x_i) * (r_x_r >= x_i), x_i) r = r_y_l * (r_x_r - x_i) * mask r += r_y_r * (x_i - r_x_l) * mask r /= mask * (r_x_r - r_x_l) + (1.0 - mask) r = expand_dims(xp, r.sum(dist_axis), dist_axis) rs.append(r) r = concatenate(xp, rs, dist_axis) return r
def pdf(self, y_pred): """ Calculate PDF from predicted quantiles. Args: y_pred: Tensor containing the quantiles predicted by the NN model. """ module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.pdf(y_pred, quantiles, quantile_axis=self.quantile_axis)
def pdf(self, y_pred): """ Calculate PDF from predicted logits. Args: y_pred: Tensor containing the logit values predicted by the neural network model. """ module = get_array_module(y_pred) bins = to_array(module, self.bins, like=y_pred) return qd.pdf(y_pred, bins, bin_axis=self.bin_axis)
def __call__(self, x, dist_axis=1): xp = get_array_module(x) n_dims = len(x.shape) shape = [1] * n_dims shape[self.dist_axis] = -1 x_a = self.x_a.reshape(shape) dx = x - x_a sdx = tensordot(xp, dx, self.s, ((self.dist_axis, ), (-1, ))) l = -0.5 * (dx * sdx).sum(self.dist_axis) return exp(xp, l)
def sample_posterior(y_pred, quantiles, n_samples=1, quantile_axis=1): """ Sample the posterior distribution described by the predicted quantiles. The sampling is performed by interpolating the inverse of the cumulative distribution function to value sampled from a uniform distribution. Args: y_pred: A rank-k tensor containing the predicted quantiles along the axis specified by ``quantile_axis``. quantiles: The quantile fractions corresponding to the predicted quantiles. n_samples: How many samples to generate for each prediction. quantile_axis: The axis in y_pred along which the predicted quantiles are found. Returns: A rank-k tensor with the values along ``quantile_axis`` replaced by samples of the posterior distribution. """ if len(y_pred.shape) == 1: quantile_axis = 0 xp = get_array_module(y_pred) n_dims = len(y_pred.shape) x_cdf, y_cdf = cdf(y_pred, quantiles, quantile_axis=quantile_axis) output_shape = list(y_pred.shape) output_shape[quantile_axis] = n_samples samples = sample_uniform(xp, tuple(output_shape)) results = xp.zeros(samples.shape) y_l = y_cdf[0] x_index = [slice(0, None)] * n_dims x_index[quantile_axis] = slice(0, 1) x_l = x_cdf[tuple(x_index)] for i in range(1, len(y_cdf)): y_r = y_cdf[i] x_index[quantile_axis] = slice(i, i + 1) x_r = x_cdf[tuple(x_index)] mask = as_type(xp, (samples > y_l) * (samples <= y_r), y_l) results += (x_l * (y_r - samples)) * mask results += (x_r * (samples - y_l)) * mask results /= (mask * (y_r - y_l) + (1.0 - mask)) y_l = y_r x_l = x_r return results
def crps(y_pdf, y_true, bins, bin_axis=1): r""" Compute the Continuous Ranked Probability Score (CRPS) for a given discrete probability density. This function uses a piece-wise linear fit to the approximate posterior CDF obtained from the predicted quantiles in :code:`y_pred` to approximate the continuous ranked probability score (CRPS): .. math:: CRPS(\mathbf{y}, x) = \int_{-\infty}^\infty (F_{x | \mathbf{y}}(x') - \mathrm{1}_{x < x'})^2 \: dx' Args: y_pred: Tensor containing the predicted discrete posterior PDF with the probabilities for different bins oriented along axis ``bin_axis`` in ``y_pred``. y_true: Array containing the true point values. bins: 1D array containing the bins corresponding to the probabilities in ``y_pred``. Returns: Tensor of rank :math:`k - 1` containing the CRPS values for each of the predictions in ``y_pred``. """ if len(y_pdf.shape) == 1: bin_axis = 0 n_y = y_pdf.shape[bin_axis] n_b = len(bins) n_dims = len(y_pdf.shape) _check_dimensions(n_y, n_b) xp = get_array_module(y_pdf) n = len(y_pdf.shape) y_cdf = posterior_cdf(y_pdf, bins, bin_axis=bin_axis) x = bins shape = [1] * n_dims shape[bin_axis] = -1 x = x.reshape(shape) if len(y_true.shape) < len(y_pdf.shape): y_true = y_true.unsqueeze(bin_axis) i = as_type(xp, x > y_true, y_cdf) crps = trapz(xp, (y_cdf - i) ** 2, x, bin_axis) return crps
def crps(self, y_pred, y_true): """ Calculate the CRPS score from predicted quantiles. Args: y_pred: Tensor containing the logit values predicted by the neural network model. y_true: Tensor containing the true values. """ module = get_array_module(y_pred) bins = to_array(module, self.bins, like=y_pred) return qd.crps( y_pred, y_true, bins, bin_axis=self.bin_axis )
def posterior_mean(self, y_pred): """ Calculate the posterior mean from predicted quantiles. Args: y_pred: Tensor containing the logit values predicted by the neural network model. """ module = get_array_module(y_pred) bins = to_array(module, self.bins, like=y_pred) return qd.posterior_mean( y_pred, bins, bin_axis=self.bin_axis )