def probability_less_than(self, x=None, y=None, y_pred=None, **kwargs): """ Calculate probability of the output value being smaller than a given numeric threshold. Args: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. y: The threshold value. Returns: Tensor of rank k-1 containing the for each input sample the probability of the corresponding y-value to be larger than the given threshold. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) def calculate_prob(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.probability_less_than(y_pred, quantiles, y, quantile_axis=self.quantile_axis) return apply(calculate_prob, y_pred)
def predict(self, x): r""" Predict quantiles of the conditional distribution :math:`p(y|x)``. Forward propagates the inputs in ``x`` through the network to obtain the predicted quantiles ``y_pred``. Arguments: x(np.array): Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. Returns: Rank-k tensor ``y_pred`` containing the quantiles of each input sample along its first dimension """ def predict(x, loss, transformation): if transformation is not None: x = transformation.invert(x) return loss.predict(x) return apply(predict, self.model.predict(x), self.losses, self.transformation)
def posterior_mean(self, x=None, y_pred=None, **kwargs): r""" Computes the posterior mean by computing the first moment of the predicted posterior CDF. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. Returns: Tensor or rank k-1 the posterior means for all provided inputs. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) def calculate_mean(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.posterior_mean(y_pred, quantiles, quantile_axis=self.quantile_axis) return apply(calculate_mean, y_pred)
def test_apply(): f1 = lambda x: 2 * x f2 = lambda x, y: x + y d = {i: i for i in range(5)} a = apply(f1, 1) b = apply(f2, 1, 1) assert a == a assert b == b d_a = apply(f1, d) d_b = apply(f2, d, d) for k in d: assert k == d_a[k] // 2 assert k == d_b[k] // 2
def aggregate_batches(self, batches): """ Aggregate list of batches. Args: batches: List of batches to aggregate. Return: Tuple ``(x, y)`` containing the aggregated inputs and outputs in 'batches'. """ xs = [] ys = None # Collect batches. for x, y in batches: xs.append(x) if isinstance(y, dict): if ys is None: ys = {} for k, y in y.items(): ys.setdefault(k, []).append(y) else: if ys is None: ys = [] ys.append(y) if self.backend is None: self.backend = get_tensor_backend(xs[0]) x = self.backend.concatenate(xs, 0) y = utils.apply(lambda y: self.backend.concatenate(y, 0), ys) if self.shuffle: indices = self._rng.permutation(x.shape[0]) f = lambda x: x[indices] x = f(x) y = utils.apply(f, y) return x, y
def __init__(self, bins, mask=None): """ Args: mask: All values that are smaller than or equal to this value will be excluded from the calculation of the loss. """ self.bins = apply(lambda x: torch.Tensor(x).to(torch.float), bins) if mask is None: reduction = "mean" self.mask = mask else: reduction = "none" self.mask = np.float32(mask) super().__init__(reduction=reduction)
def posterior_quantiles(self, x=None, y_pred=None, quantiles=None, key=None): r""" Compute the posterior quantiles. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. quantiles: List of quantile fraction values :math:`\tau_i \in [0, 1]`. Returns: Rank-k tensor containing the desired predicted quantiles along its first dimension. """ if y_pred is None: if x is None: raise ValueError("One of the keyword arguments 'x' or 'y_pred'" " must be provided.") y_pred = self.predict(x) if quantiles is None: raise ValueError( "The 'quantiles' keyword argument must be provided to" "calculate the posterior quantiles.") if key is None: bins = self.bins else: if isinstance(self.bins, dict): bins = self.bins[key] else: bins = self.bins def calculate_quantiles(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.posterior_quantiles(y_pred, bins, quantiles, bin_axis=self.bin_axis) return apply(calculate_quantiles, y_pred, bins)
def crps(self, x=None, y_pred=None, y_true=None, **kwargs): r""" Compute the Continuous Ranked Probability Score (CRPS). This function uses a piece-wise linear fit to the approximate posterior CDF obtained from the predicted quantiles in :code:`y_pred` to approximate the continuous ranked probability score (CRPS): .. math:: \text{CRPS}(\mathbf{y}, x) = \int_{-\infty}^\infty (F_{x | \mathbf{y}}(x') - \mathrm{1}_{x < x'})^2 \: dx' Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. y_true: Array containing the `n` true values, i.e. samples of the true conditional distribution predicted by the QRNN. quantiles: 1D array containing the `k` quantile fractions :math:`\tau` that correspond to the columns in `y_pred`. Returns: Tensor of rank k-1 containing the CRPS values for each of the samples. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) if y_true is None: raise ValueError( "The y_true argument must be provided to calculate " "the CRPS provided.") def calculate_crps(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.crps(y_pred, y_true, quantiles, quantile_axis=self.quantile_axis) return apply(calculate_crps, y_pred)
def cdf(self, x=None, y_pred=None, **kwargs): r""" Approximate the posterior CDF for given inputs ``x``. Propagates the inputs in ``x`` forward through the network and approximates the posterior CDF using a piecewise linear function. The piecewise linear function is given by its at quantiles :math:`y_{\tau_i}`` for :math:`\tau = \{0.0, \tau_1, \ldots, \tau_k, 1.0\}` where :math:`\tau_i` are the quantile fractions to be predicted by the network. The values for :math:`y_{\tau={0.0}}` and :math:`x_{\tau={1.0}}` are computed using .. math:: y_{\tau=0.0} = 2.0 x_{\tau_1} - x_{\tau_2} y_{\tau=1.0} = 2.0 x_{\tau_k} - x_{\tau_{k-1}} Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. Returns: Tuple ``(y_cdf, cdf)`` containing the abscissa-values ``y_cdf`` and the ordinates values ``cdf`` of the piece-wise linear approximation of the CDF :math:`F(y)`. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) def calculate_cdf(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.cdf(y_pred, quantiles, quantile_axis=self.quantile_axis) return apply(calculate_cdf, y_pred)
def sample_posterior(self, x=None, y_pred=None, n_samples=1, key=None): r""" Generates :code:`n` samples from the predicted posterior distribution for the input vector :code:`x`. The sampling is performed by the inverse CDF method using the predicted CDF obtained from the :code:`cdf` member function. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. n: The number of samples to generate. Returns: Rank-k tensor containing the random samples for each input sample along the first dimension. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) if key is None: bins = self.bins else: if isinstance(self.bins, dict): bins = self.bins[key] else: bins = self.bins def calculate_samples(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.sample_posterior(y_pred, bins, n_samples=n_samples, bin_axis=self.bin_axis) return apply(calculate_samples, y_pred, bins)
def probability_larger_than(self, x=None, y=None, y_pred=None, key=None): """ Calculate probability of the output value being larger than a given numeric threshold. Args: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. y: The threshold value. Returns: Tensor of rank k-1 containing the for each input sample the probability of the corresponding y-value to be larger than the given threshold. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) if y is None: raise ValueError("The y argument must be provided to compute the " " probability.") if key is None: bins = self.bins else: if isinstance(self.bins, dict): bins = self.bins[key] else: bins = self.bins def calculate_probability(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.probability_larger_than(y_pred, bins, y, bin_axis=self.bin_axis) return apply(calculate_probability, y_pred, bins)
def quantile_function(self, x=None, y_pred=None, y=None, key=None): r""" Evaluate the quantile function a given y values. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed predicted pdf, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. y: Rank-k tensor containing the values at which to evaluate the quantile function for each of the inputs in ``x``. Returns: Rank-k tensor containing the random samples for each input sample along the first dimension. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) if key is None: bins = self.bins else: if isinstance(self.bins, dict): bins = self.bins[key] else: bins = self.bins def calculate_quantile_function(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.quantile_function(y_pred, y, bins, bin_axis=self.bin_axis) return apply(calculate_quantile_function, y_pred, bins)
def crps(self, x=None, y_pred=None, y_true=None, key=None): r""" Calculate CRPS score for given reference values. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. y_true: Rank-k tensor containing the true y values. Returns: Rank-k tensor containing crps values for all samples in x. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) if key is None: bins = self.bins else: if isinstance(self.bins, dict): bins = self.bins[key] else: bins = self.bins def calculate_crps(y_pred, bins): module = get_array_module(y_pred) bins = to_array(module, bins, like=y_pred) return qd.crps(y_pred, y_true, bins, bin_axis=self.bin_axis) return apply(calculate_crps, y_pred, bins)
def pdf(self, x=None, y_pred=None, **kwargs): r""" Approximate the posterior probability density function (PDF) for given inputs ``x``. The PDF is approximated by computing the derivative of the piece-wise linear approximation of the CDF as computed by the :py:meth:`quantnn.QRNN.cdf` function. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. Returns: Tuple (x_pdf, y_pdf) containing the array with shape `(n, k)` containing the x and y coordinates describing the PDF for the inputs in ``x``. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) def calculate_pdf(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.pdf(y_pred, quantiles, quantile_axis=self.quantile_axis) return apply(calculate_pdf, y_pred)
def sample_posterior_gaussian_fit(self, x=None, y_pred=None, n_samples=1): r""" Generates :code:`n` samples from the predicted posterior distribution for the input vector :code:`x`. The sampling is performed using a Gaussian fit to the predicted quantiles. Arguments: x: Rank-k tensor containing the input data with the input channels (or features) for each sample located along its first dimension. y_pred: Optional pre-computed quantile predictions, which, when provided, will be used to avoid repeated propagation of the the inputs through the network. n(int): The number of samples to generate. Returns: Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding values of the posterior CDF :math:`F(x)` in `fs`. """ if y_pred is None: if x is None: raise ValueError( "One of the input arguments x or y_pred must be " " provided.") y_pred = self.predict(x) def calculate_samples(y_pred): module = get_array_module(y_pred) quantiles = to_array(module, self.quantiles, like=y_pred) return qq.sample_posterior_gaussian( y_pred, quantiles, n_samples=n_samples, quantile_axis=self.quantile_axis) return apply(calculate_samples, y_pred)
def predict(self, x): y_pred = self.model.predict(x) return apply(self._post_process_prediction, y_pred, self.bins)